{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.999366420274551, "global_step": 8281, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25816988945007324, "epoch": 0.0, "learning_rate": 4.9993962081874176e-05, "loss": 0.2805, "step": 1, "task_loss": 0.5853821039199829 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18578635156154633, "epoch": 0.0, "learning_rate": 4.998792416374834e-05, "loss": 0.1863, "step": 2, "task_loss": 0.16780924797058105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2412126362323761, "epoch": 0.0, "learning_rate": 4.998188624562251e-05, "loss": 0.4082, "step": 3, "task_loss": 0.380114883184433 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2583884596824646, "epoch": 0.0, "learning_rate": 4.9975848327496685e-05, "loss": 0.3689, "step": 4, "task_loss": 0.6353768110275269 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3934633433818817, "epoch": 0.0, "learning_rate": 4.996981040937085e-05, "loss": 0.4124, "step": 5, "task_loss": 0.24388056993484497 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16049812734127045, "epoch": 0.01, "learning_rate": 4.996377249124502e-05, "loss": 0.3367, "step": 6, "task_loss": 0.5224175453186035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27577707171440125, "epoch": 0.01, "learning_rate": 4.995773457311919e-05, "loss": 0.3457, "step": 7, "task_loss": 0.41049373149871826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28069770336151123, "epoch": 0.01, "learning_rate": 4.995169665499336e-05, "loss": 0.4221, "step": 8, "task_loss": 0.16821245849132538 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31688547134399414, "epoch": 0.01, "learning_rate": 4.994565873686753e-05, "loss": 0.4094, "step": 9, "task_loss": 0.3438544273376465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37644460797309875, "epoch": 0.01, "learning_rate": 4.99396208187417e-05, "loss": 0.285, "step": 10, "task_loss": 0.2462257742881775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1935107409954071, "epoch": 0.01, "learning_rate": 4.993358290061587e-05, "loss": 0.3464, "step": 11, "task_loss": 0.42837294936180115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3776441216468811, "epoch": 0.01, "learning_rate": 4.9927544982490036e-05, "loss": 0.367, "step": 12, "task_loss": 0.40439867973327637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40584027767181396, "epoch": 0.01, "learning_rate": 4.992150706436421e-05, "loss": 0.3143, "step": 13, "task_loss": 0.5767415761947632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37956827878952026, "epoch": 0.01, "learning_rate": 4.9915469146238384e-05, "loss": 0.2916, "step": 14, "task_loss": 0.5369002223014832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3782750368118286, "epoch": 0.01, "learning_rate": 4.9909431228112544e-05, "loss": 0.4942, "step": 15, "task_loss": 0.5017585754394531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23874841630458832, "epoch": 0.01, "learning_rate": 4.990339330998672e-05, "loss": 0.2912, "step": 16, "task_loss": 0.7724030017852783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3366963863372803, "epoch": 0.01, "learning_rate": 4.989735539186089e-05, "loss": 0.3094, "step": 17, "task_loss": 1.3630739450454712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4486166834831238, "epoch": 0.02, "learning_rate": 4.989131747373506e-05, "loss": 0.503, "step": 18, "task_loss": 0.6402081251144409 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30841130018234253, "epoch": 0.02, "learning_rate": 4.9885279555609226e-05, "loss": 0.289, "step": 19, "task_loss": 1.072608232498169 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43669694662094116, "epoch": 0.02, "learning_rate": 4.98792416374834e-05, "loss": 0.525, "step": 20, "task_loss": 1.057503581047058 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20193564891815186, "epoch": 0.02, "learning_rate": 4.987320371935757e-05, "loss": 0.338, "step": 21, "task_loss": 0.7899333238601685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4605207145214081, "epoch": 0.02, "learning_rate": 4.9867165801231735e-05, "loss": 0.3432, "step": 22, "task_loss": 0.7641338109970093 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16217002272605896, "epoch": 0.02, "learning_rate": 4.986112788310591e-05, "loss": 0.3753, "step": 23, "task_loss": 0.7667312622070312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32749849557876587, "epoch": 0.02, "learning_rate": 4.9855089964980076e-05, "loss": 0.3067, "step": 24, "task_loss": 0.1911318153142929 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3496509790420532, "epoch": 0.02, "learning_rate": 4.984905204685424e-05, "loss": 0.4657, "step": 25, "task_loss": 0.9077955484390259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28667593002319336, "epoch": 0.02, "learning_rate": 4.984301412872842e-05, "loss": 0.4758, "step": 26, "task_loss": 0.7345035076141357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29842638969421387, "epoch": 0.02, "learning_rate": 4.9836976210602584e-05, "loss": 0.3211, "step": 27, "task_loss": 1.0764777660369873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1474331021308899, "epoch": 0.02, "learning_rate": 4.983093829247676e-05, "loss": 0.2171, "step": 28, "task_loss": 0.1873449832201004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.309543639421463, "epoch": 0.02, "learning_rate": 4.9824900374350925e-05, "loss": 0.3459, "step": 29, "task_loss": 1.0530891418457031 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6381558179855347, "epoch": 0.03, "learning_rate": 4.98188624562251e-05, "loss": 0.4117, "step": 30, "task_loss": 0.6502270102500916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3867588937282562, "epoch": 0.03, "learning_rate": 4.9812824538099266e-05, "loss": 0.3533, "step": 31, "task_loss": 0.5898758172988892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23949167132377625, "epoch": 0.03, "learning_rate": 4.9806786619973434e-05, "loss": 0.3424, "step": 32, "task_loss": 0.5812395215034485 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22307108342647552, "epoch": 0.03, "learning_rate": 4.980074870184761e-05, "loss": 0.4721, "step": 33, "task_loss": 0.9143725037574768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.689225971698761, "epoch": 0.03, "learning_rate": 4.9794710783721775e-05, "loss": 0.4365, "step": 34, "task_loss": 0.7707207202911377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.511667013168335, "epoch": 0.03, "learning_rate": 4.978867286559594e-05, "loss": 0.4662, "step": 35, "task_loss": 0.6621748208999634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1682216227054596, "epoch": 0.03, "learning_rate": 4.9782634947470116e-05, "loss": 0.3645, "step": 36, "task_loss": 0.3129403591156006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27336210012435913, "epoch": 0.03, "learning_rate": 4.977659702934428e-05, "loss": 0.3905, "step": 37, "task_loss": 0.842422366142273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4350525736808777, "epoch": 0.03, "learning_rate": 4.977055911121846e-05, "loss": 0.3497, "step": 38, "task_loss": 0.3842719793319702 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26390278339385986, "epoch": 0.03, "learning_rate": 4.9764521193092624e-05, "loss": 0.3053, "step": 39, "task_loss": 0.5854817032814026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24932250380516052, "epoch": 0.03, "learning_rate": 4.975848327496679e-05, "loss": 0.3391, "step": 40, "task_loss": 1.2524149417877197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4836498498916626, "epoch": 0.03, "learning_rate": 4.9752445356840965e-05, "loss": 0.3985, "step": 41, "task_loss": 0.49639376997947693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20194953680038452, "epoch": 0.04, "learning_rate": 4.974640743871513e-05, "loss": 0.295, "step": 42, "task_loss": 0.24493812024593353 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2915697991847992, "epoch": 0.04, "learning_rate": 4.97403695205893e-05, "loss": 0.414, "step": 43, "task_loss": 1.0381807088851929 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2561498284339905, "epoch": 0.04, "learning_rate": 4.9734331602463474e-05, "loss": 0.3719, "step": 44, "task_loss": 0.3786720633506775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5230277180671692, "epoch": 0.04, "learning_rate": 4.972829368433764e-05, "loss": 0.3501, "step": 45, "task_loss": 0.7780864238739014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45299801230430603, "epoch": 0.04, "learning_rate": 4.9722255766211815e-05, "loss": 0.4428, "step": 46, "task_loss": 0.6362600326538086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2322419285774231, "epoch": 0.04, "learning_rate": 4.971621784808598e-05, "loss": 0.3672, "step": 47, "task_loss": 0.9719992876052856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24673917889595032, "epoch": 0.04, "learning_rate": 4.9710179929960156e-05, "loss": 0.3087, "step": 48, "task_loss": 0.18985499441623688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31108900904655457, "epoch": 0.04, "learning_rate": 4.970414201183432e-05, "loss": 0.312, "step": 49, "task_loss": 0.3032427430152893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2457078993320465, "epoch": 0.04, "learning_rate": 4.969810409370849e-05, "loss": 0.2658, "step": 50, "task_loss": 0.40747126936912537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31999048590660095, "epoch": 0.04, "learning_rate": 4.9692066175582664e-05, "loss": 0.3359, "step": 51, "task_loss": 0.5623685121536255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2703372836112976, "epoch": 0.04, "learning_rate": 4.968602825745683e-05, "loss": 0.2941, "step": 52, "task_loss": 0.348588228225708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28450003266334534, "epoch": 0.04, "learning_rate": 4.9679990339331e-05, "loss": 0.2969, "step": 53, "task_loss": 0.19887912273406982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29565611481666565, "epoch": 0.05, "learning_rate": 4.967395242120517e-05, "loss": 0.3594, "step": 54, "task_loss": 0.6065662503242493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26429638266563416, "epoch": 0.05, "learning_rate": 4.966791450307934e-05, "loss": 0.3163, "step": 55, "task_loss": 1.2679150104522705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23068846762180328, "epoch": 0.05, "learning_rate": 4.966187658495351e-05, "loss": 0.3525, "step": 56, "task_loss": 0.7505380511283875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20673078298568726, "epoch": 0.05, "learning_rate": 4.965583866682768e-05, "loss": 0.3205, "step": 57, "task_loss": 0.5095474123954773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5032368898391724, "epoch": 0.05, "learning_rate": 4.9649800748701855e-05, "loss": 0.4566, "step": 58, "task_loss": 0.35173001885414124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20861366391181946, "epoch": 0.05, "learning_rate": 4.9643762830576015e-05, "loss": 0.267, "step": 59, "task_loss": 0.1368056684732437 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28144073486328125, "epoch": 0.05, "learning_rate": 4.963772491245019e-05, "loss": 0.3099, "step": 60, "task_loss": 0.249720498919487 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3778335452079773, "epoch": 0.05, "learning_rate": 4.963168699432436e-05, "loss": 0.3294, "step": 61, "task_loss": 0.7302160859107971 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2934783697128296, "epoch": 0.05, "learning_rate": 4.962564907619853e-05, "loss": 0.3507, "step": 62, "task_loss": 0.9726874232292175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3533038794994354, "epoch": 0.05, "learning_rate": 4.96196111580727e-05, "loss": 0.3099, "step": 63, "task_loss": 0.6883425712585449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47766798734664917, "epoch": 0.05, "learning_rate": 4.961357323994687e-05, "loss": 0.4069, "step": 64, "task_loss": 0.628416121006012 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.13898852467536926, "epoch": 0.05, "learning_rate": 4.960753532182104e-05, "loss": 0.2574, "step": 65, "task_loss": 0.3677407205104828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2932609021663666, "epoch": 0.06, "learning_rate": 4.9601497403695206e-05, "loss": 0.3555, "step": 66, "task_loss": 0.8631270527839661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2619141936302185, "epoch": 0.06, "learning_rate": 4.959545948556938e-05, "loss": 0.2706, "step": 67, "task_loss": 0.7948020100593567 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32445028424263, "epoch": 0.06, "learning_rate": 4.958942156744355e-05, "loss": 0.3153, "step": 68, "task_loss": 0.17519311606884003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37282195687294006, "epoch": 0.06, "learning_rate": 4.9583383649317714e-05, "loss": 0.2846, "step": 69, "task_loss": 0.6995919346809387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48085302114486694, "epoch": 0.06, "learning_rate": 4.957734573119189e-05, "loss": 0.4272, "step": 70, "task_loss": 0.7737849950790405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39250123500823975, "epoch": 0.06, "learning_rate": 4.957130781306606e-05, "loss": 0.3995, "step": 71, "task_loss": 1.084855079650879 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3200048804283142, "epoch": 0.06, "learning_rate": 4.956526989494022e-05, "loss": 0.3879, "step": 72, "task_loss": 0.4472369849681854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29730963706970215, "epoch": 0.06, "learning_rate": 4.9559231976814396e-05, "loss": 0.32, "step": 73, "task_loss": 0.5673580765724182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41329067945480347, "epoch": 0.06, "learning_rate": 4.955319405868857e-05, "loss": 0.3976, "step": 74, "task_loss": 0.7128048539161682 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5558891892433167, "epoch": 0.06, "learning_rate": 4.954715614056273e-05, "loss": 0.3503, "step": 75, "task_loss": 1.0567996501922607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24657243490219116, "epoch": 0.06, "learning_rate": 4.9541118222436905e-05, "loss": 0.5436, "step": 76, "task_loss": 0.5225351452827454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17418046295642853, "epoch": 0.07, "learning_rate": 4.953508030431108e-05, "loss": 0.3026, "step": 77, "task_loss": 0.09465155750513077 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3253038823604584, "epoch": 0.07, "learning_rate": 4.9529042386185246e-05, "loss": 0.3254, "step": 78, "task_loss": 0.8527747988700867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2609974443912506, "epoch": 0.07, "learning_rate": 4.952300446805941e-05, "loss": 0.341, "step": 79, "task_loss": 0.34290611743927 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6192164421081543, "epoch": 0.07, "learning_rate": 4.951696654993359e-05, "loss": 0.4046, "step": 80, "task_loss": 0.3117198944091797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3827619254589081, "epoch": 0.07, "learning_rate": 4.9510928631807754e-05, "loss": 0.3605, "step": 81, "task_loss": 1.5429582595825195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24813801050186157, "epoch": 0.07, "learning_rate": 4.950489071368192e-05, "loss": 0.276, "step": 82, "task_loss": 0.5928993821144104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3891957998275757, "epoch": 0.07, "learning_rate": 4.9498852795556095e-05, "loss": 0.2779, "step": 83, "task_loss": 0.3394239544868469 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32362642884254456, "epoch": 0.07, "learning_rate": 4.949281487743026e-05, "loss": 0.3733, "step": 84, "task_loss": 0.4562441408634186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2390662133693695, "epoch": 0.07, "learning_rate": 4.948677695930443e-05, "loss": 0.3067, "step": 85, "task_loss": 1.2519537210464478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2174815535545349, "epoch": 0.07, "learning_rate": 4.9480739041178604e-05, "loss": 0.4703, "step": 86, "task_loss": 0.22248567640781403 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48957890272140503, "epoch": 0.07, "learning_rate": 4.947470112305278e-05, "loss": 0.434, "step": 87, "task_loss": 0.9494900703430176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2942778468132019, "epoch": 0.07, "learning_rate": 4.9468663204926945e-05, "loss": 0.2396, "step": 88, "task_loss": 0.5726677179336548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2033950835466385, "epoch": 0.08, "learning_rate": 4.946262528680111e-05, "loss": 0.2663, "step": 89, "task_loss": 0.22979408502578735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21911516785621643, "epoch": 0.08, "learning_rate": 4.9456587368675286e-05, "loss": 0.3303, "step": 90, "task_loss": 0.4464656710624695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.548670768737793, "epoch": 0.08, "learning_rate": 4.945054945054945e-05, "loss": 0.3237, "step": 91, "task_loss": 0.39537152647972107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.520744800567627, "epoch": 0.08, "learning_rate": 4.944451153242362e-05, "loss": 0.402, "step": 92, "task_loss": 0.5880875587463379 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27351728081703186, "epoch": 0.08, "learning_rate": 4.9438473614297794e-05, "loss": 0.5001, "step": 93, "task_loss": 1.006089448928833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4162762463092804, "epoch": 0.08, "learning_rate": 4.943243569617196e-05, "loss": 0.333, "step": 94, "task_loss": 0.38947728276252747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6660782098770142, "epoch": 0.08, "learning_rate": 4.942639777804613e-05, "loss": 0.4537, "step": 95, "task_loss": 0.8405843377113342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2645527720451355, "epoch": 0.08, "learning_rate": 4.94203598599203e-05, "loss": 0.3795, "step": 96, "task_loss": 1.5186443328857422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33359208703041077, "epoch": 0.08, "learning_rate": 4.941432194179447e-05, "loss": 0.3062, "step": 97, "task_loss": 0.712734580039978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3495367765426636, "epoch": 0.08, "learning_rate": 4.9408284023668644e-05, "loss": 0.3617, "step": 98, "task_loss": 0.8323235511779785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15926022827625275, "epoch": 0.08, "learning_rate": 4.940224610554281e-05, "loss": 0.295, "step": 99, "task_loss": 0.4674801230430603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5407546758651733, "epoch": 0.08, "learning_rate": 4.939620818741698e-05, "loss": 0.4817, "step": 100, "task_loss": 1.2738523483276367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32479119300842285, "epoch": 0.09, "learning_rate": 4.939017026929115e-05, "loss": 0.3333, "step": 101, "task_loss": 0.23474960029125214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2216508984565735, "epoch": 0.09, "learning_rate": 4.938413235116532e-05, "loss": 0.356, "step": 102, "task_loss": 0.9930310845375061 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4806128740310669, "epoch": 0.09, "learning_rate": 4.937809443303949e-05, "loss": 0.4969, "step": 103, "task_loss": 1.0667014122009277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47363537549972534, "epoch": 0.09, "learning_rate": 4.937205651491366e-05, "loss": 0.4173, "step": 104, "task_loss": 1.3374643325805664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5180001258850098, "epoch": 0.09, "learning_rate": 4.936601859678783e-05, "loss": 0.3256, "step": 105, "task_loss": 0.9311383366584778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17958365380764008, "epoch": 0.09, "learning_rate": 4.9359980678662e-05, "loss": 0.4124, "step": 106, "task_loss": 0.5176780819892883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27220118045806885, "epoch": 0.09, "learning_rate": 4.935394276053617e-05, "loss": 0.4031, "step": 107, "task_loss": 0.8968584537506104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23908105492591858, "epoch": 0.09, "learning_rate": 4.934790484241034e-05, "loss": 0.3409, "step": 108, "task_loss": 0.1722966432571411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5481657385826111, "epoch": 0.09, "learning_rate": 4.934186692428451e-05, "loss": 0.3517, "step": 109, "task_loss": 0.40819215774536133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21406511962413788, "epoch": 0.09, "learning_rate": 4.933582900615868e-05, "loss": 0.3994, "step": 110, "task_loss": 0.18322262167930603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33389830589294434, "epoch": 0.09, "learning_rate": 4.932979108803285e-05, "loss": 0.4166, "step": 111, "task_loss": 0.8832553625106812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22076596319675446, "epoch": 0.09, "learning_rate": 4.932375316990702e-05, "loss": 0.3236, "step": 112, "task_loss": 0.03128660097718239 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36936795711517334, "epoch": 0.1, "learning_rate": 4.9317715251781185e-05, "loss": 0.3175, "step": 113, "task_loss": 0.757693886756897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23552142083644867, "epoch": 0.1, "learning_rate": 4.931167733365536e-05, "loss": 0.3213, "step": 114, "task_loss": 0.37792858481407166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.415870726108551, "epoch": 0.1, "learning_rate": 4.9305639415529527e-05, "loss": 0.3616, "step": 115, "task_loss": 0.39920058846473694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21843309700489044, "epoch": 0.1, "learning_rate": 4.9299601497403694e-05, "loss": 0.342, "step": 116, "task_loss": 0.5047226548194885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5091196894645691, "epoch": 0.1, "learning_rate": 4.929356357927787e-05, "loss": 0.3985, "step": 117, "task_loss": 0.7317909002304077 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5024101734161377, "epoch": 0.1, "learning_rate": 4.928752566115204e-05, "loss": 0.3924, "step": 118, "task_loss": 0.5542799830436707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24388448894023895, "epoch": 0.1, "learning_rate": 4.928148774302621e-05, "loss": 0.4085, "step": 119, "task_loss": 0.29891934990882874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4093080163002014, "epoch": 0.1, "learning_rate": 4.9275449824900376e-05, "loss": 0.5379, "step": 120, "task_loss": 1.0453921556472778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5290612578392029, "epoch": 0.1, "learning_rate": 4.926941190677455e-05, "loss": 0.4721, "step": 121, "task_loss": 0.29127857089042664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6379303336143494, "epoch": 0.1, "learning_rate": 4.926337398864872e-05, "loss": 0.3908, "step": 122, "task_loss": 0.5893012285232544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20345769822597504, "epoch": 0.1, "learning_rate": 4.9257336070522884e-05, "loss": 0.2864, "step": 123, "task_loss": 0.8678903579711914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4842088222503662, "epoch": 0.1, "learning_rate": 4.925129815239706e-05, "loss": 0.3455, "step": 124, "task_loss": 0.6765943765640259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8208075761795044, "epoch": 0.11, "learning_rate": 4.9245260234271226e-05, "loss": 0.4262, "step": 125, "task_loss": 0.46901002526283264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.251159131526947, "epoch": 0.11, "learning_rate": 4.923922231614539e-05, "loss": 0.2871, "step": 126, "task_loss": 1.2856820821762085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3683735728263855, "epoch": 0.11, "learning_rate": 4.923318439801957e-05, "loss": 0.3587, "step": 127, "task_loss": 0.5024790167808533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2632887065410614, "epoch": 0.11, "learning_rate": 4.922714647989374e-05, "loss": 0.413, "step": 128, "task_loss": 2.067840814590454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4994406998157501, "epoch": 0.11, "learning_rate": 4.92211085617679e-05, "loss": 0.4634, "step": 129, "task_loss": 1.5496513843536377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7655148506164551, "epoch": 0.11, "learning_rate": 4.9215070643642075e-05, "loss": 0.3568, "step": 130, "task_loss": 0.38475537300109863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24943795800209045, "epoch": 0.11, "learning_rate": 4.920903272551625e-05, "loss": 0.3271, "step": 131, "task_loss": 1.4404325485229492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24281589686870575, "epoch": 0.11, "learning_rate": 4.920299480739041e-05, "loss": 0.2401, "step": 132, "task_loss": 0.1107693538069725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33831462264060974, "epoch": 0.11, "learning_rate": 4.919695688926458e-05, "loss": 0.3988, "step": 133, "task_loss": 0.9888232350349426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34890276193618774, "epoch": 0.11, "learning_rate": 4.919091897113876e-05, "loss": 0.4251, "step": 134, "task_loss": 1.5109165906906128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15280941128730774, "epoch": 0.11, "learning_rate": 4.9184881053012924e-05, "loss": 0.2274, "step": 135, "task_loss": 0.4075343906879425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.467395544052124, "epoch": 0.11, "learning_rate": 4.917884313488709e-05, "loss": 0.3395, "step": 136, "task_loss": 0.7098667621612549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2536891996860504, "epoch": 0.12, "learning_rate": 4.9172805216761266e-05, "loss": 0.3614, "step": 137, "task_loss": 0.09199410676956177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.186837837100029, "epoch": 0.12, "learning_rate": 4.916676729863543e-05, "loss": 0.3514, "step": 138, "task_loss": 0.7203828692436218 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20152251422405243, "epoch": 0.12, "learning_rate": 4.91607293805096e-05, "loss": 0.281, "step": 139, "task_loss": 0.06725029647350311 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4330352246761322, "epoch": 0.12, "learning_rate": 4.9154691462383774e-05, "loss": 0.2649, "step": 140, "task_loss": 0.7574622631072998 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22957520186901093, "epoch": 0.12, "learning_rate": 4.914865354425794e-05, "loss": 0.3723, "step": 141, "task_loss": 0.7737582325935364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3494449853897095, "epoch": 0.12, "learning_rate": 4.914261562613211e-05, "loss": 0.3457, "step": 142, "task_loss": 1.0415617227554321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30456164479255676, "epoch": 0.12, "learning_rate": 4.913657770800628e-05, "loss": 0.312, "step": 143, "task_loss": 1.8037948608398438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2190721482038498, "epoch": 0.12, "learning_rate": 4.9130539789880456e-05, "loss": 0.2779, "step": 144, "task_loss": 1.1289736032485962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28175005316734314, "epoch": 0.12, "learning_rate": 4.912450187175462e-05, "loss": 0.3231, "step": 145, "task_loss": 0.5026500225067139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5755388736724854, "epoch": 0.12, "learning_rate": 4.911846395362879e-05, "loss": 0.3759, "step": 146, "task_loss": 1.3843616247177124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34669309854507446, "epoch": 0.12, "learning_rate": 4.9112426035502965e-05, "loss": 0.3986, "step": 147, "task_loss": 0.44473525881767273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2831808030605316, "epoch": 0.13, "learning_rate": 4.910638811737713e-05, "loss": 0.3167, "step": 148, "task_loss": 0.9988532066345215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17373451590538025, "epoch": 0.13, "learning_rate": 4.91003501992513e-05, "loss": 0.3903, "step": 149, "task_loss": 0.3376629054546356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5232499837875366, "epoch": 0.13, "learning_rate": 4.909431228112547e-05, "loss": 0.4094, "step": 150, "task_loss": 0.5718556046485901 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40484216809272766, "epoch": 0.13, "learning_rate": 4.908827436299964e-05, "loss": 0.5119, "step": 151, "task_loss": 1.023551106452942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3534274697303772, "epoch": 0.13, "learning_rate": 4.908223644487381e-05, "loss": 0.4093, "step": 152, "task_loss": 0.25515076518058777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.542445957660675, "epoch": 0.13, "learning_rate": 4.907619852674798e-05, "loss": 0.5587, "step": 153, "task_loss": 0.6882498264312744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4814941883087158, "epoch": 0.13, "learning_rate": 4.907016060862215e-05, "loss": 0.4198, "step": 154, "task_loss": 0.43797406554222107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3490346074104309, "epoch": 0.13, "learning_rate": 4.9064122690496316e-05, "loss": 0.4359, "step": 155, "task_loss": 0.4982220232486725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35131698846817017, "epoch": 0.13, "learning_rate": 4.905808477237049e-05, "loss": 0.3519, "step": 156, "task_loss": 0.23772475123405457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4136688709259033, "epoch": 0.13, "learning_rate": 4.905204685424466e-05, "loss": 0.3497, "step": 157, "task_loss": 0.9108613133430481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5679277777671814, "epoch": 0.13, "learning_rate": 4.904600893611883e-05, "loss": 0.5538, "step": 158, "task_loss": 0.3551519811153412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21929334104061127, "epoch": 0.13, "learning_rate": 4.9039971017993e-05, "loss": 0.3791, "step": 159, "task_loss": 0.09782926738262177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1876523494720459, "epoch": 0.14, "learning_rate": 4.903393309986717e-05, "loss": 0.2872, "step": 160, "task_loss": 0.6902381777763367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.686616837978363, "epoch": 0.14, "learning_rate": 4.902789518174134e-05, "loss": 0.4391, "step": 161, "task_loss": 0.697905957698822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1724153757095337, "epoch": 0.14, "learning_rate": 4.9021857263615506e-05, "loss": 0.2557, "step": 162, "task_loss": 0.01864825189113617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31484732031822205, "epoch": 0.14, "learning_rate": 4.901581934548968e-05, "loss": 0.293, "step": 163, "task_loss": 0.5195794105529785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6137804985046387, "epoch": 0.14, "learning_rate": 4.900978142736385e-05, "loss": 0.4083, "step": 164, "task_loss": 1.0109091997146606 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5426200032234192, "epoch": 0.14, "learning_rate": 4.9003743509238014e-05, "loss": 0.3669, "step": 165, "task_loss": 0.7892276048660278 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3899754285812378, "epoch": 0.14, "learning_rate": 4.899770559111219e-05, "loss": 0.4875, "step": 166, "task_loss": 0.8190497756004333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38025766611099243, "epoch": 0.14, "learning_rate": 4.8991667672986356e-05, "loss": 0.3498, "step": 167, "task_loss": 1.5057487487792969 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22788751125335693, "epoch": 0.14, "learning_rate": 4.898562975486053e-05, "loss": 0.3909, "step": 168, "task_loss": 0.1304035186767578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24553920328617096, "epoch": 0.14, "learning_rate": 4.89795918367347e-05, "loss": 0.2719, "step": 169, "task_loss": 0.0628340020775795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2144862711429596, "epoch": 0.14, "learning_rate": 4.8973553918608864e-05, "loss": 0.2753, "step": 170, "task_loss": 0.28159356117248535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37090346217155457, "epoch": 0.14, "learning_rate": 4.896751600048304e-05, "loss": 0.3093, "step": 171, "task_loss": 0.6044325828552246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.283582478761673, "epoch": 0.15, "learning_rate": 4.8961478082357205e-05, "loss": 0.2962, "step": 172, "task_loss": 0.54231196641922 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3424731492996216, "epoch": 0.15, "learning_rate": 4.895544016423137e-05, "loss": 0.4156, "step": 173, "task_loss": 0.4681467115879059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3768496513366699, "epoch": 0.15, "learning_rate": 4.8949402246105546e-05, "loss": 0.3797, "step": 174, "task_loss": 1.2841796875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.358032763004303, "epoch": 0.15, "learning_rate": 4.8943364327979713e-05, "loss": 0.3096, "step": 175, "task_loss": 0.5068651437759399 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14947356283664703, "epoch": 0.15, "learning_rate": 4.893732640985389e-05, "loss": 0.3655, "step": 176, "task_loss": 0.42544713616371155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4119400382041931, "epoch": 0.15, "learning_rate": 4.8931288491728055e-05, "loss": 0.3414, "step": 177, "task_loss": 0.5711463093757629 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2045574188232422, "epoch": 0.15, "learning_rate": 4.892525057360222e-05, "loss": 0.2565, "step": 178, "task_loss": 0.6481475234031677 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24270205199718475, "epoch": 0.15, "learning_rate": 4.8919212655476396e-05, "loss": 0.3202, "step": 179, "task_loss": 0.3328099250793457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32103991508483887, "epoch": 0.15, "learning_rate": 4.891317473735056e-05, "loss": 0.3954, "step": 180, "task_loss": 0.5921569466590881 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3563228249549866, "epoch": 0.15, "learning_rate": 4.890713681922474e-05, "loss": 0.3808, "step": 181, "task_loss": 0.45374301075935364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31121641397476196, "epoch": 0.15, "learning_rate": 4.8901098901098904e-05, "loss": 0.3482, "step": 182, "task_loss": 0.963464081287384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2124629020690918, "epoch": 0.15, "learning_rate": 4.889506098297307e-05, "loss": 0.3355, "step": 183, "task_loss": 0.6052290797233582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22625645995140076, "epoch": 0.16, "learning_rate": 4.8889023064847245e-05, "loss": 0.392, "step": 184, "task_loss": 1.454310655593872 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3684975802898407, "epoch": 0.16, "learning_rate": 4.888298514672141e-05, "loss": 0.5371, "step": 185, "task_loss": 0.7324237823486328 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2952372431755066, "epoch": 0.16, "learning_rate": 4.887694722859558e-05, "loss": 0.4193, "step": 186, "task_loss": 1.0562167167663574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15316781401634216, "epoch": 0.16, "learning_rate": 4.8870909310469754e-05, "loss": 0.2822, "step": 187, "task_loss": 0.32499364018440247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36386430263519287, "epoch": 0.16, "learning_rate": 4.886487139234392e-05, "loss": 0.4516, "step": 188, "task_loss": 0.45146071910858154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47371023893356323, "epoch": 0.16, "learning_rate": 4.885883347421809e-05, "loss": 0.3769, "step": 189, "task_loss": 1.1611276865005493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2361714243888855, "epoch": 0.16, "learning_rate": 4.885279555609226e-05, "loss": 0.2963, "step": 190, "task_loss": 0.6114784479141235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11003533750772476, "epoch": 0.16, "learning_rate": 4.8846757637966436e-05, "loss": 0.2562, "step": 191, "task_loss": 0.03457775339484215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46895644068717957, "epoch": 0.16, "learning_rate": 4.88407197198406e-05, "loss": 0.3306, "step": 192, "task_loss": 0.7334683537483215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3926951289176941, "epoch": 0.16, "learning_rate": 4.883468180171477e-05, "loss": 0.4085, "step": 193, "task_loss": 0.44728636741638184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.487906813621521, "epoch": 0.16, "learning_rate": 4.8828643883588944e-05, "loss": 0.3473, "step": 194, "task_loss": 0.5576806664466858 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.500954806804657, "epoch": 0.16, "learning_rate": 4.882260596546311e-05, "loss": 0.3527, "step": 195, "task_loss": 0.5573667883872986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23157338798046112, "epoch": 0.17, "learning_rate": 4.881656804733728e-05, "loss": 0.3341, "step": 196, "task_loss": 0.7897164821624756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5552976727485657, "epoch": 0.17, "learning_rate": 4.881053012921145e-05, "loss": 0.3655, "step": 197, "task_loss": 0.5584388375282288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4161600172519684, "epoch": 0.17, "learning_rate": 4.880449221108562e-05, "loss": 0.452, "step": 198, "task_loss": 0.5745552778244019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3097490668296814, "epoch": 0.17, "learning_rate": 4.879845429295979e-05, "loss": 0.3593, "step": 199, "task_loss": 0.6984278559684753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49976080656051636, "epoch": 0.17, "learning_rate": 4.879241637483396e-05, "loss": 0.4618, "step": 200, "task_loss": 1.4376177787780762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42486974596977234, "epoch": 0.17, "learning_rate": 4.8786378456708135e-05, "loss": 0.2704, "step": 201, "task_loss": 0.7904959917068481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3746505677700043, "epoch": 0.17, "learning_rate": 4.8780340538582295e-05, "loss": 0.3793, "step": 202, "task_loss": 0.581667959690094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15201419591903687, "epoch": 0.17, "learning_rate": 4.877430262045647e-05, "loss": 0.3495, "step": 203, "task_loss": 0.2156621366739273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20418617129325867, "epoch": 0.17, "learning_rate": 4.876826470233064e-05, "loss": 0.3675, "step": 204, "task_loss": 0.4504316747188568 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6943567991256714, "epoch": 0.17, "learning_rate": 4.8762226784204803e-05, "loss": 0.3854, "step": 205, "task_loss": 0.22673046588897705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19938820600509644, "epoch": 0.17, "learning_rate": 4.875618886607898e-05, "loss": 0.392, "step": 206, "task_loss": 0.21580791473388672 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23300912976264954, "epoch": 0.17, "learning_rate": 4.875015094795315e-05, "loss": 0.3447, "step": 207, "task_loss": 0.7670917510986328 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23599499464035034, "epoch": 0.18, "learning_rate": 4.874411302982732e-05, "loss": 0.3106, "step": 208, "task_loss": 0.8528785705566406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8274899125099182, "epoch": 0.18, "learning_rate": 4.8738075111701486e-05, "loss": 0.425, "step": 209, "task_loss": 0.9706495404243469 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2531846761703491, "epoch": 0.18, "learning_rate": 4.873203719357566e-05, "loss": 0.259, "step": 210, "task_loss": 0.39630326628685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19299283623695374, "epoch": 0.18, "learning_rate": 4.872599927544983e-05, "loss": 0.3502, "step": 211, "task_loss": 0.3861299753189087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2091982513666153, "epoch": 0.18, "learning_rate": 4.8719961357323994e-05, "loss": 0.301, "step": 212, "task_loss": 0.6071786880493164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26235446333885193, "epoch": 0.18, "learning_rate": 4.871392343919817e-05, "loss": 0.2982, "step": 213, "task_loss": 0.0860738679766655 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23299503326416016, "epoch": 0.18, "learning_rate": 4.8707885521072335e-05, "loss": 0.3838, "step": 214, "task_loss": 1.4147411584854126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33884522318840027, "epoch": 0.18, "learning_rate": 4.87018476029465e-05, "loss": 0.3378, "step": 215, "task_loss": 0.6194642186164856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4095205068588257, "epoch": 0.18, "learning_rate": 4.8695809684820676e-05, "loss": 0.2675, "step": 216, "task_loss": 1.0903899669647217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34026312828063965, "epoch": 0.18, "learning_rate": 4.868977176669485e-05, "loss": 0.4538, "step": 217, "task_loss": 1.1433500051498413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4270251989364624, "epoch": 0.18, "learning_rate": 4.868373384856901e-05, "loss": 0.3852, "step": 218, "task_loss": 0.5341470837593079 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27537986636161804, "epoch": 0.19, "learning_rate": 4.8677695930443185e-05, "loss": 0.4107, "step": 219, "task_loss": 0.5932626724243164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26450324058532715, "epoch": 0.19, "learning_rate": 4.867165801231736e-05, "loss": 0.343, "step": 220, "task_loss": 0.7140247225761414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31506139039993286, "epoch": 0.19, "learning_rate": 4.8665620094191526e-05, "loss": 0.2773, "step": 221, "task_loss": 0.29893991351127625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2811909019947052, "epoch": 0.19, "learning_rate": 4.865958217606569e-05, "loss": 0.3319, "step": 222, "task_loss": 1.3602136373519897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5581226944923401, "epoch": 0.19, "learning_rate": 4.865354425793987e-05, "loss": 0.4197, "step": 223, "task_loss": 0.6683071255683899 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31096911430358887, "epoch": 0.19, "learning_rate": 4.8647506339814034e-05, "loss": 0.3428, "step": 224, "task_loss": 0.6044328808784485 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2684841752052307, "epoch": 0.19, "learning_rate": 4.86414684216882e-05, "loss": 0.3328, "step": 225, "task_loss": 0.5970885753631592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20033758878707886, "epoch": 0.19, "learning_rate": 4.8635430503562375e-05, "loss": 0.3128, "step": 226, "task_loss": 0.2964940667152405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2790672183036804, "epoch": 0.19, "learning_rate": 4.862939258543654e-05, "loss": 0.331, "step": 227, "task_loss": 0.4587661027908325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40140119194984436, "epoch": 0.19, "learning_rate": 4.862335466731071e-05, "loss": 0.4773, "step": 228, "task_loss": 1.3390758037567139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5898053050041199, "epoch": 0.19, "learning_rate": 4.8617316749184884e-05, "loss": 0.484, "step": 229, "task_loss": 1.6657185554504395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.363012433052063, "epoch": 0.19, "learning_rate": 4.861127883105905e-05, "loss": 0.3168, "step": 230, "task_loss": 0.8613802194595337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24187681078910828, "epoch": 0.2, "learning_rate": 4.8605240912933225e-05, "loss": 0.2809, "step": 231, "task_loss": 0.8444315791130066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21894003450870514, "epoch": 0.2, "learning_rate": 4.859920299480739e-05, "loss": 0.3651, "step": 232, "task_loss": 0.8712193369865417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4181317389011383, "epoch": 0.2, "learning_rate": 4.8593165076681566e-05, "loss": 0.2804, "step": 233, "task_loss": 0.5653902292251587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27538320422172546, "epoch": 0.2, "learning_rate": 4.858712715855573e-05, "loss": 0.3669, "step": 234, "task_loss": 0.3681375980377197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28899216651916504, "epoch": 0.2, "learning_rate": 4.85810892404299e-05, "loss": 0.3217, "step": 235, "task_loss": 0.7067887187004089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15619833767414093, "epoch": 0.2, "learning_rate": 4.8575051322304074e-05, "loss": 0.2765, "step": 236, "task_loss": 0.33059683442115784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3716956377029419, "epoch": 0.2, "learning_rate": 4.856901340417824e-05, "loss": 0.3955, "step": 237, "task_loss": 1.5424350500106812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20977318286895752, "epoch": 0.2, "learning_rate": 4.856297548605241e-05, "loss": 0.489, "step": 238, "task_loss": 1.061920166015625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38254275918006897, "epoch": 0.2, "learning_rate": 4.855693756792658e-05, "loss": 0.3987, "step": 239, "task_loss": 0.6773325204849243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16488344967365265, "epoch": 0.2, "learning_rate": 4.855089964980075e-05, "loss": 0.2558, "step": 240, "task_loss": 0.23429742455482483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5256952047348022, "epoch": 0.2, "learning_rate": 4.8544861731674924e-05, "loss": 0.3292, "step": 241, "task_loss": 0.414902001619339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3921819031238556, "epoch": 0.2, "learning_rate": 4.853882381354909e-05, "loss": 0.4092, "step": 242, "task_loss": 0.3879691958427429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43839550018310547, "epoch": 0.21, "learning_rate": 4.853278589542326e-05, "loss": 0.406, "step": 243, "task_loss": 0.3458399474620819 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5594123005867004, "epoch": 0.21, "learning_rate": 4.852674797729743e-05, "loss": 0.35, "step": 244, "task_loss": 0.7361867427825928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2727014124393463, "epoch": 0.21, "learning_rate": 4.85207100591716e-05, "loss": 0.3929, "step": 245, "task_loss": 0.22650966048240662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24511823058128357, "epoch": 0.21, "learning_rate": 4.8514672141045766e-05, "loss": 0.3514, "step": 246, "task_loss": 0.5117925405502319 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23608553409576416, "epoch": 0.21, "learning_rate": 4.850863422291994e-05, "loss": 0.2926, "step": 247, "task_loss": 0.562814474105835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28395915031433105, "epoch": 0.21, "learning_rate": 4.850259630479411e-05, "loss": 0.3661, "step": 248, "task_loss": 0.21349310874938965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6520936489105225, "epoch": 0.21, "learning_rate": 4.849655838666828e-05, "loss": 0.425, "step": 249, "task_loss": 0.5300475358963013 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5111593008041382, "epoch": 0.21, "learning_rate": 4.849052046854245e-05, "loss": 0.3344, "step": 250, "task_loss": 0.2857605218887329 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4802568256855011, "epoch": 0.21, "learning_rate": 4.848448255041662e-05, "loss": 0.3503, "step": 251, "task_loss": 1.1864210367202759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30522099137306213, "epoch": 0.21, "learning_rate": 4.847844463229079e-05, "loss": 0.4114, "step": 252, "task_loss": 0.7442456483840942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2635870575904846, "epoch": 0.21, "learning_rate": 4.847240671416496e-05, "loss": 0.4497, "step": 253, "task_loss": 0.6402062773704529 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.352478563785553, "epoch": 0.21, "learning_rate": 4.846636879603913e-05, "loss": 0.2767, "step": 254, "task_loss": 0.9297935962677002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4081909656524658, "epoch": 0.22, "learning_rate": 4.84603308779133e-05, "loss": 0.321, "step": 255, "task_loss": 0.6388130784034729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3642035722732544, "epoch": 0.22, "learning_rate": 4.8454292959787465e-05, "loss": 0.3386, "step": 256, "task_loss": 0.8731583952903748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24019326269626617, "epoch": 0.22, "learning_rate": 4.844825504166164e-05, "loss": 0.3104, "step": 257, "task_loss": 0.5203094482421875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29272475838661194, "epoch": 0.22, "learning_rate": 4.8442217123535806e-05, "loss": 0.4279, "step": 258, "task_loss": 0.7139970660209656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4900217652320862, "epoch": 0.22, "learning_rate": 4.8436179205409974e-05, "loss": 0.3434, "step": 259, "task_loss": 1.206527590751648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23717987537384033, "epoch": 0.22, "learning_rate": 4.843014128728415e-05, "loss": 0.2648, "step": 260, "task_loss": 0.1249101385474205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.194607675075531, "epoch": 0.22, "learning_rate": 4.842410336915832e-05, "loss": 0.3232, "step": 261, "task_loss": 1.1592941284179688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17562155425548553, "epoch": 0.22, "learning_rate": 4.841806545103248e-05, "loss": 0.3056, "step": 262, "task_loss": 0.4430987536907196 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.393974244594574, "epoch": 0.22, "learning_rate": 4.8412027532906656e-05, "loss": 0.3759, "step": 263, "task_loss": 0.6270527839660645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1993233561515808, "epoch": 0.22, "learning_rate": 4.840598961478083e-05, "loss": 0.2674, "step": 264, "task_loss": 0.3872101604938507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.430284321308136, "epoch": 0.22, "learning_rate": 4.8399951696655e-05, "loss": 0.3473, "step": 265, "task_loss": 0.8067490458488464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17175956070423126, "epoch": 0.22, "learning_rate": 4.8393913778529164e-05, "loss": 0.3197, "step": 266, "task_loss": 0.16316160559654236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34469112753868103, "epoch": 0.23, "learning_rate": 4.838787586040334e-05, "loss": 0.3742, "step": 267, "task_loss": 0.929304301738739 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31713566184043884, "epoch": 0.23, "learning_rate": 4.8381837942277505e-05, "loss": 0.3821, "step": 268, "task_loss": 0.6186680197715759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6471445560455322, "epoch": 0.23, "learning_rate": 4.837580002415167e-05, "loss": 0.442, "step": 269, "task_loss": 0.6571251153945923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29757431149482727, "epoch": 0.23, "learning_rate": 4.8369762106025847e-05, "loss": 0.3307, "step": 270, "task_loss": 0.21978020668029785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.354866623878479, "epoch": 0.23, "learning_rate": 4.8363724187900014e-05, "loss": 0.311, "step": 271, "task_loss": 0.9863007068634033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1809060275554657, "epoch": 0.23, "learning_rate": 4.835768626977418e-05, "loss": 0.4201, "step": 272, "task_loss": 0.1380172073841095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2241010069847107, "epoch": 0.23, "learning_rate": 4.8351648351648355e-05, "loss": 0.335, "step": 273, "task_loss": 0.12595102190971375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19005972146987915, "epoch": 0.23, "learning_rate": 4.834561043352253e-05, "loss": 0.2755, "step": 274, "task_loss": 0.24514470994472504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20019391179084778, "epoch": 0.23, "learning_rate": 4.833957251539669e-05, "loss": 0.2854, "step": 275, "task_loss": 0.025080382823944092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.489296555519104, "epoch": 0.23, "learning_rate": 4.833353459727086e-05, "loss": 0.3756, "step": 276, "task_loss": 0.3847203552722931 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2391885221004486, "epoch": 0.23, "learning_rate": 4.832749667914504e-05, "loss": 0.2948, "step": 277, "task_loss": 0.1924915760755539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3371428847312927, "epoch": 0.23, "learning_rate": 4.83214587610192e-05, "loss": 0.2761, "step": 278, "task_loss": 0.29785943031311035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31375688314437866, "epoch": 0.24, "learning_rate": 4.831542084289337e-05, "loss": 0.3084, "step": 279, "task_loss": 0.5493555068969727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3100293278694153, "epoch": 0.24, "learning_rate": 4.8309382924767545e-05, "loss": 0.2934, "step": 280, "task_loss": 0.5424577593803406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2426050752401352, "epoch": 0.24, "learning_rate": 4.830334500664171e-05, "loss": 0.3874, "step": 281, "task_loss": 0.7884585857391357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36966627836227417, "epoch": 0.24, "learning_rate": 4.829730708851588e-05, "loss": 0.3573, "step": 282, "task_loss": 0.8238126039505005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24911276996135712, "epoch": 0.24, "learning_rate": 4.8291269170390054e-05, "loss": 0.4464, "step": 283, "task_loss": 1.1340264081954956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39401501417160034, "epoch": 0.24, "learning_rate": 4.828523125226422e-05, "loss": 0.3437, "step": 284, "task_loss": 0.6530060172080994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23328270018100739, "epoch": 0.24, "learning_rate": 4.827919333413839e-05, "loss": 0.3357, "step": 285, "task_loss": 0.5647006630897522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16991066932678223, "epoch": 0.24, "learning_rate": 4.827315541601256e-05, "loss": 0.3179, "step": 286, "task_loss": 0.32162773609161377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3763618469238281, "epoch": 0.24, "learning_rate": 4.826711749788673e-05, "loss": 0.3541, "step": 287, "task_loss": 0.4727748930454254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6950287818908691, "epoch": 0.24, "learning_rate": 4.8261079579760896e-05, "loss": 0.423, "step": 288, "task_loss": 0.24831871688365936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43147361278533936, "epoch": 0.24, "learning_rate": 4.825504166163507e-05, "loss": 0.3197, "step": 289, "task_loss": 1.5783672332763672 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18680858612060547, "epoch": 0.24, "learning_rate": 4.8249003743509244e-05, "loss": 0.2997, "step": 290, "task_loss": 0.8671808242797852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4599849581718445, "epoch": 0.25, "learning_rate": 4.824296582538341e-05, "loss": 0.4107, "step": 291, "task_loss": 0.3785121738910675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.10759762674570084, "epoch": 0.25, "learning_rate": 4.823692790725758e-05, "loss": 0.308, "step": 292, "task_loss": 0.27092301845550537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.567275881767273, "epoch": 0.25, "learning_rate": 4.823088998913175e-05, "loss": 0.4305, "step": 293, "task_loss": 0.45297971367836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3822139501571655, "epoch": 0.25, "learning_rate": 4.822485207100592e-05, "loss": 0.3889, "step": 294, "task_loss": 0.4468007981777191 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18324688076972961, "epoch": 0.25, "learning_rate": 4.821881415288009e-05, "loss": 0.2909, "step": 295, "task_loss": 0.2566443383693695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.560999870300293, "epoch": 0.25, "learning_rate": 4.821277623475426e-05, "loss": 0.448, "step": 296, "task_loss": 1.2885384559631348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20526620745658875, "epoch": 0.25, "learning_rate": 4.820673831662843e-05, "loss": 0.3491, "step": 297, "task_loss": 0.6789870858192444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5073180794715881, "epoch": 0.25, "learning_rate": 4.8200700398502595e-05, "loss": 0.3963, "step": 298, "task_loss": 0.6330386400222778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31261587142944336, "epoch": 0.25, "learning_rate": 4.819466248037677e-05, "loss": 0.373, "step": 299, "task_loss": 0.534789502620697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35550448298454285, "epoch": 0.25, "learning_rate": 4.8188624562250937e-05, "loss": 0.3637, "step": 300, "task_loss": 0.510871171951294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2786980867385864, "epoch": 0.25, "learning_rate": 4.818258664412511e-05, "loss": 0.3726, "step": 301, "task_loss": 0.14009304344654083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22258764505386353, "epoch": 0.26, "learning_rate": 4.817654872599928e-05, "loss": 0.3001, "step": 302, "task_loss": 0.538131058216095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20524820685386658, "epoch": 0.26, "learning_rate": 4.8170510807873445e-05, "loss": 0.3576, "step": 303, "task_loss": 0.4417031407356262 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42584243416786194, "epoch": 0.26, "learning_rate": 4.816447288974762e-05, "loss": 0.4253, "step": 304, "task_loss": 1.3795497417449951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5721404552459717, "epoch": 0.26, "learning_rate": 4.8158434971621786e-05, "loss": 0.3744, "step": 305, "task_loss": 1.0504950284957886 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15696823596954346, "epoch": 0.26, "learning_rate": 4.815239705349596e-05, "loss": 0.2966, "step": 306, "task_loss": 0.08542599529027939 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2984582781791687, "epoch": 0.26, "learning_rate": 4.814635913537013e-05, "loss": 0.2839, "step": 307, "task_loss": 0.9690406918525696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30952370166778564, "epoch": 0.26, "learning_rate": 4.8140321217244294e-05, "loss": 0.3956, "step": 308, "task_loss": 0.9119435548782349 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43029385805130005, "epoch": 0.26, "learning_rate": 4.813428329911847e-05, "loss": 0.3542, "step": 309, "task_loss": 0.428992360830307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2898123860359192, "epoch": 0.26, "learning_rate": 4.8128245380992635e-05, "loss": 0.4572, "step": 310, "task_loss": 1.1544145345687866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19898618757724762, "epoch": 0.26, "learning_rate": 4.812220746286681e-05, "loss": 0.3843, "step": 311, "task_loss": 0.1991880089044571 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19172519445419312, "epoch": 0.26, "learning_rate": 4.811616954474098e-05, "loss": 0.2997, "step": 312, "task_loss": 0.18731540441513062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4143029451370239, "epoch": 0.26, "learning_rate": 4.8110131626615144e-05, "loss": 0.2551, "step": 313, "task_loss": 0.6104400157928467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28943347930908203, "epoch": 0.27, "learning_rate": 4.810409370848932e-05, "loss": 0.332, "step": 314, "task_loss": 0.5059872269630432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45407694578170776, "epoch": 0.27, "learning_rate": 4.8098055790363485e-05, "loss": 0.3514, "step": 315, "task_loss": 0.8044115900993347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2280709147453308, "epoch": 0.27, "learning_rate": 4.809201787223765e-05, "loss": 0.3424, "step": 316, "task_loss": 0.2259802222251892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4099350571632385, "epoch": 0.27, "learning_rate": 4.8085979954111826e-05, "loss": 0.4666, "step": 317, "task_loss": 0.8793275952339172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25592198967933655, "epoch": 0.27, "learning_rate": 4.807994203598599e-05, "loss": 0.3461, "step": 318, "task_loss": 0.607208788394928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4621131420135498, "epoch": 0.27, "learning_rate": 4.807390411786016e-05, "loss": 0.3901, "step": 319, "task_loss": 0.5261971354484558 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22693340480327606, "epoch": 0.27, "learning_rate": 4.8067866199734334e-05, "loss": 0.2213, "step": 320, "task_loss": 0.5978261232376099 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2422429919242859, "epoch": 0.27, "learning_rate": 4.806182828160851e-05, "loss": 0.4444, "step": 321, "task_loss": 1.0581352710723877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37081557512283325, "epoch": 0.27, "learning_rate": 4.8055790363482676e-05, "loss": 0.3709, "step": 322, "task_loss": 0.7748590707778931 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46459850668907166, "epoch": 0.27, "learning_rate": 4.804975244535684e-05, "loss": 0.3109, "step": 323, "task_loss": 1.2749043703079224 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29648324847221375, "epoch": 0.27, "learning_rate": 4.804371452723102e-05, "loss": 0.3537, "step": 324, "task_loss": 0.602401077747345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25404345989227295, "epoch": 0.27, "learning_rate": 4.8037676609105184e-05, "loss": 0.3649, "step": 325, "task_loss": 0.24259012937545776 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29637932777404785, "epoch": 0.28, "learning_rate": 4.803163869097935e-05, "loss": 0.2798, "step": 326, "task_loss": 0.5864307284355164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2804659605026245, "epoch": 0.28, "learning_rate": 4.8025600772853525e-05, "loss": 0.3357, "step": 327, "task_loss": 0.929490327835083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2018260359764099, "epoch": 0.28, "learning_rate": 4.801956285472769e-05, "loss": 0.309, "step": 328, "task_loss": 0.4979628026485443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2459574043750763, "epoch": 0.28, "learning_rate": 4.801352493660186e-05, "loss": 0.4405, "step": 329, "task_loss": 0.5364251732826233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36930930614471436, "epoch": 0.28, "learning_rate": 4.800748701847603e-05, "loss": 0.3603, "step": 330, "task_loss": 0.31747937202453613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21881195902824402, "epoch": 0.28, "learning_rate": 4.800144910035021e-05, "loss": 0.524, "step": 331, "task_loss": 0.37361156940460205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46083328127861023, "epoch": 0.28, "learning_rate": 4.799541118222437e-05, "loss": 0.3065, "step": 332, "task_loss": 0.4813331365585327 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22865751385688782, "epoch": 0.28, "learning_rate": 4.798937326409854e-05, "loss": 0.2875, "step": 333, "task_loss": 0.23592215776443481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2872365415096283, "epoch": 0.28, "learning_rate": 4.7983335345972716e-05, "loss": 0.3447, "step": 334, "task_loss": 0.6643169522285461 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22379858791828156, "epoch": 0.28, "learning_rate": 4.7977297427846876e-05, "loss": 0.2005, "step": 335, "task_loss": 0.6081706881523132 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23591037094593048, "epoch": 0.28, "learning_rate": 4.797125950972105e-05, "loss": 0.2412, "step": 336, "task_loss": 0.13559593260288239 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2989702820777893, "epoch": 0.28, "learning_rate": 4.7965221591595224e-05, "loss": 0.3947, "step": 337, "task_loss": 0.9134337902069092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45019423961639404, "epoch": 0.29, "learning_rate": 4.795918367346939e-05, "loss": 0.4523, "step": 338, "task_loss": 0.8208408951759338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4156774580478668, "epoch": 0.29, "learning_rate": 4.795314575534356e-05, "loss": 0.3336, "step": 339, "task_loss": 0.5028893947601318 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2671329975128174, "epoch": 0.29, "learning_rate": 4.794710783721773e-05, "loss": 0.3015, "step": 340, "task_loss": 0.9643546342849731 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4216309189796448, "epoch": 0.29, "learning_rate": 4.79410699190919e-05, "loss": 0.3539, "step": 341, "task_loss": 0.577515721321106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2717858850955963, "epoch": 0.29, "learning_rate": 4.793503200096607e-05, "loss": 0.3857, "step": 342, "task_loss": 0.1958017647266388 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2821325957775116, "epoch": 0.29, "learning_rate": 4.792899408284024e-05, "loss": 0.3446, "step": 343, "task_loss": 0.5786783695220947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36023688316345215, "epoch": 0.29, "learning_rate": 4.792295616471441e-05, "loss": 0.3421, "step": 344, "task_loss": 1.0065206289291382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1944040060043335, "epoch": 0.29, "learning_rate": 4.7916918246588575e-05, "loss": 0.361, "step": 345, "task_loss": 1.1140313148498535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24445584416389465, "epoch": 0.29, "learning_rate": 4.791088032846275e-05, "loss": 0.2299, "step": 346, "task_loss": 0.3781536817550659 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42969006299972534, "epoch": 0.29, "learning_rate": 4.790484241033692e-05, "loss": 0.3169, "step": 347, "task_loss": 0.8441028594970703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2303089052438736, "epoch": 0.29, "learning_rate": 4.789880449221108e-05, "loss": 0.337, "step": 348, "task_loss": 0.4934919774532318 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3386507034301758, "epoch": 0.29, "learning_rate": 4.789276657408526e-05, "loss": 0.3716, "step": 349, "task_loss": 0.054628316313028336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31714892387390137, "epoch": 0.3, "learning_rate": 4.788672865595943e-05, "loss": 0.3874, "step": 350, "task_loss": 0.1863877773284912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18160632252693176, "epoch": 0.3, "learning_rate": 4.78806907378336e-05, "loss": 0.3522, "step": 351, "task_loss": 0.5615395307540894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32398074865341187, "epoch": 0.3, "learning_rate": 4.7874652819707766e-05, "loss": 0.3259, "step": 352, "task_loss": 0.9202311038970947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3226522207260132, "epoch": 0.3, "learning_rate": 4.786861490158194e-05, "loss": 0.354, "step": 353, "task_loss": 0.5552647709846497 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3574296236038208, "epoch": 0.3, "learning_rate": 4.786257698345611e-05, "loss": 0.3838, "step": 354, "task_loss": 0.17772305011749268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2681061625480652, "epoch": 0.3, "learning_rate": 4.7856539065330274e-05, "loss": 0.3658, "step": 355, "task_loss": 0.22847799956798553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3040708005428314, "epoch": 0.3, "learning_rate": 4.785050114720445e-05, "loss": 0.4651, "step": 356, "task_loss": 1.0607177019119263 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37687888741493225, "epoch": 0.3, "learning_rate": 4.7844463229078615e-05, "loss": 0.3122, "step": 357, "task_loss": 0.34086158871650696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41844409704208374, "epoch": 0.3, "learning_rate": 4.783842531095278e-05, "loss": 0.3354, "step": 358, "task_loss": 1.0120818614959717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38185131549835205, "epoch": 0.3, "learning_rate": 4.7832387392826956e-05, "loss": 0.3811, "step": 359, "task_loss": 0.5875111818313599 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5939629077911377, "epoch": 0.3, "learning_rate": 4.7826349474701123e-05, "loss": 0.3744, "step": 360, "task_loss": 0.5604010820388794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34011173248291016, "epoch": 0.3, "learning_rate": 4.782031155657529e-05, "loss": 0.408, "step": 361, "task_loss": 0.9530080556869507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7397969961166382, "epoch": 0.31, "learning_rate": 4.7814273638449465e-05, "loss": 0.3892, "step": 362, "task_loss": 0.7186213731765747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.10755526274442673, "epoch": 0.31, "learning_rate": 4.780823572032364e-05, "loss": 0.3841, "step": 363, "task_loss": 0.651858925819397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2566481828689575, "epoch": 0.31, "learning_rate": 4.7802197802197806e-05, "loss": 0.4148, "step": 364, "task_loss": 1.6260356903076172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2114250659942627, "epoch": 0.31, "learning_rate": 4.779615988407197e-05, "loss": 0.2718, "step": 365, "task_loss": 0.08021149784326553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1955362856388092, "epoch": 0.31, "learning_rate": 4.779012196594615e-05, "loss": 0.353, "step": 366, "task_loss": 0.17776460945606232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4258083701133728, "epoch": 0.31, "learning_rate": 4.7784084047820314e-05, "loss": 0.4292, "step": 367, "task_loss": 0.7558051347732544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36908140778541565, "epoch": 0.31, "learning_rate": 4.777804612969448e-05, "loss": 0.3344, "step": 368, "task_loss": 0.6298174858093262 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24819843471050262, "epoch": 0.31, "learning_rate": 4.7772008211568655e-05, "loss": 0.3379, "step": 369, "task_loss": 0.4020809531211853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7088571786880493, "epoch": 0.31, "learning_rate": 4.776597029344282e-05, "loss": 0.4665, "step": 370, "task_loss": 0.6894850134849548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24134713411331177, "epoch": 0.31, "learning_rate": 4.775993237531699e-05, "loss": 0.3403, "step": 371, "task_loss": 0.03153505176305771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14028973877429962, "epoch": 0.31, "learning_rate": 4.7753894457191163e-05, "loss": 0.4063, "step": 372, "task_loss": 0.5380560755729675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2787623405456543, "epoch": 0.32, "learning_rate": 4.774785653906533e-05, "loss": 0.3469, "step": 373, "task_loss": 0.3664689064025879 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1806008219718933, "epoch": 0.32, "learning_rate": 4.7741818620939505e-05, "loss": 0.3726, "step": 374, "task_loss": 0.2669003903865814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4385870695114136, "epoch": 0.32, "learning_rate": 4.773578070281367e-05, "loss": 0.3562, "step": 375, "task_loss": 0.4492567479610443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4735843241214752, "epoch": 0.32, "learning_rate": 4.772974278468784e-05, "loss": 0.4357, "step": 376, "task_loss": 0.32930344343185425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20514580607414246, "epoch": 0.32, "learning_rate": 4.772370486656201e-05, "loss": 0.3232, "step": 377, "task_loss": 0.2108025997877121 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34991320967674255, "epoch": 0.32, "learning_rate": 4.771766694843618e-05, "loss": 0.3108, "step": 378, "task_loss": 0.3094622790813446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5897074937820435, "epoch": 0.32, "learning_rate": 4.7711629030310354e-05, "loss": 0.3872, "step": 379, "task_loss": 0.14162714779376984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19355034828186035, "epoch": 0.32, "learning_rate": 4.770559111218452e-05, "loss": 0.2795, "step": 380, "task_loss": 0.36807486414909363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6750555634498596, "epoch": 0.32, "learning_rate": 4.769955319405869e-05, "loss": 0.3886, "step": 381, "task_loss": 0.8189461827278137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20718452334403992, "epoch": 0.32, "learning_rate": 4.769351527593286e-05, "loss": 0.3071, "step": 382, "task_loss": 0.5275229811668396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3943529427051544, "epoch": 0.32, "learning_rate": 4.768747735780703e-05, "loss": 0.2765, "step": 383, "task_loss": 1.057845115661621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2323814034461975, "epoch": 0.32, "learning_rate": 4.7681439439681204e-05, "loss": 0.3218, "step": 384, "task_loss": 0.31893423199653625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3772730827331543, "epoch": 0.33, "learning_rate": 4.767540152155537e-05, "loss": 0.4506, "step": 385, "task_loss": 0.7788183689117432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23674823343753815, "epoch": 0.33, "learning_rate": 4.766936360342954e-05, "loss": 0.3268, "step": 386, "task_loss": 0.23776189982891083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39172255992889404, "epoch": 0.33, "learning_rate": 4.766332568530371e-05, "loss": 0.4911, "step": 387, "task_loss": 0.48238494992256165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5850317478179932, "epoch": 0.33, "learning_rate": 4.765728776717788e-05, "loss": 0.5242, "step": 388, "task_loss": 0.8710096478462219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38239288330078125, "epoch": 0.33, "learning_rate": 4.7651249849052046e-05, "loss": 0.3992, "step": 389, "task_loss": 0.8591318130493164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26710498332977295, "epoch": 0.33, "learning_rate": 4.764521193092622e-05, "loss": 0.3401, "step": 390, "task_loss": 0.7722374796867371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38567298650741577, "epoch": 0.33, "learning_rate": 4.763917401280039e-05, "loss": 0.2331, "step": 391, "task_loss": 0.7410212159156799 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24967175722122192, "epoch": 0.33, "learning_rate": 4.7633136094674555e-05, "loss": 0.4718, "step": 392, "task_loss": 0.27016666531562805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29140520095825195, "epoch": 0.33, "learning_rate": 4.762709817654873e-05, "loss": 0.3552, "step": 393, "task_loss": 1.4024794101715088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2952750027179718, "epoch": 0.33, "learning_rate": 4.76210602584229e-05, "loss": 0.3231, "step": 394, "task_loss": 0.5316455364227295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16485609114170074, "epoch": 0.33, "learning_rate": 4.761502234029707e-05, "loss": 0.2561, "step": 395, "task_loss": 0.5078779458999634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3249528110027313, "epoch": 0.33, "learning_rate": 4.760898442217124e-05, "loss": 0.2898, "step": 396, "task_loss": 0.2882906496524811 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3757632374763489, "epoch": 0.34, "learning_rate": 4.760294650404541e-05, "loss": 0.3383, "step": 397, "task_loss": 0.3791997730731964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3346756100654602, "epoch": 0.34, "learning_rate": 4.759690858591958e-05, "loss": 0.3454, "step": 398, "task_loss": 0.5002127885818481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29547449946403503, "epoch": 0.34, "learning_rate": 4.7590870667793745e-05, "loss": 0.3299, "step": 399, "task_loss": 0.5307417511940002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2334940880537033, "epoch": 0.34, "learning_rate": 4.758483274966792e-05, "loss": 0.242, "step": 400, "task_loss": 0.9370666146278381 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43376976251602173, "epoch": 0.34, "learning_rate": 4.7578794831542086e-05, "loss": 0.3939, "step": 401, "task_loss": 0.8960332870483398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42513972520828247, "epoch": 0.34, "learning_rate": 4.7572756913416254e-05, "loss": 0.2688, "step": 402, "task_loss": 0.8588773012161255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4598153829574585, "epoch": 0.34, "learning_rate": 4.756671899529043e-05, "loss": 0.3251, "step": 403, "task_loss": 0.07185312360525131 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39404115080833435, "epoch": 0.34, "learning_rate": 4.75606810771646e-05, "loss": 0.3877, "step": 404, "task_loss": 0.9305233955383301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38386476039886475, "epoch": 0.34, "learning_rate": 4.755464315903876e-05, "loss": 0.3919, "step": 405, "task_loss": 0.5179880857467651 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18021468818187714, "epoch": 0.34, "learning_rate": 4.7548605240912936e-05, "loss": 0.3241, "step": 406, "task_loss": 0.48423126339912415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2952260375022888, "epoch": 0.34, "learning_rate": 4.754256732278711e-05, "loss": 0.3803, "step": 407, "task_loss": 0.49306514859199524 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29078054428100586, "epoch": 0.34, "learning_rate": 4.753652940466127e-05, "loss": 0.3363, "step": 408, "task_loss": 0.6858339905738831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4606356620788574, "epoch": 0.35, "learning_rate": 4.7530491486535444e-05, "loss": 0.4153, "step": 409, "task_loss": 0.28931108117103577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3300876021385193, "epoch": 0.35, "learning_rate": 4.752445356840962e-05, "loss": 0.3077, "step": 410, "task_loss": 0.20503780245780945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4121675491333008, "epoch": 0.35, "learning_rate": 4.751841565028378e-05, "loss": 0.3139, "step": 411, "task_loss": 0.7052090764045715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11272185295820236, "epoch": 0.35, "learning_rate": 4.751237773215795e-05, "loss": 0.3831, "step": 412, "task_loss": 0.0059148287400603294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2776438593864441, "epoch": 0.35, "learning_rate": 4.7506339814032126e-05, "loss": 0.325, "step": 413, "task_loss": 0.770449161529541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1635061800479889, "epoch": 0.35, "learning_rate": 4.7500301895906294e-05, "loss": 0.4445, "step": 414, "task_loss": 1.110143780708313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6209632158279419, "epoch": 0.35, "learning_rate": 4.749426397778046e-05, "loss": 0.4698, "step": 415, "task_loss": 0.4084756374359131 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3674491345882416, "epoch": 0.35, "learning_rate": 4.7488226059654635e-05, "loss": 0.3599, "step": 416, "task_loss": 0.008358551189303398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3936578035354614, "epoch": 0.35, "learning_rate": 4.74821881415288e-05, "loss": 0.437, "step": 417, "task_loss": 0.20710767805576324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35538336634635925, "epoch": 0.35, "learning_rate": 4.747615022340297e-05, "loss": 0.2891, "step": 418, "task_loss": 0.5034711360931396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34694403409957886, "epoch": 0.35, "learning_rate": 4.747011230527714e-05, "loss": 0.313, "step": 419, "task_loss": 0.7034034729003906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2399771511554718, "epoch": 0.35, "learning_rate": 4.746407438715132e-05, "loss": 0.3878, "step": 420, "task_loss": 0.7609342932701111 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42129385471343994, "epoch": 0.36, "learning_rate": 4.745803646902548e-05, "loss": 0.3658, "step": 421, "task_loss": 0.534970760345459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19749030470848083, "epoch": 0.36, "learning_rate": 4.745199855089965e-05, "loss": 0.335, "step": 422, "task_loss": 0.07227137684822083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25277066230773926, "epoch": 0.36, "learning_rate": 4.7445960632773825e-05, "loss": 0.354, "step": 423, "task_loss": 0.5646321773529053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2725897431373596, "epoch": 0.36, "learning_rate": 4.743992271464799e-05, "loss": 0.3444, "step": 424, "task_loss": 0.8753369450569153 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20973093807697296, "epoch": 0.36, "learning_rate": 4.743388479652216e-05, "loss": 0.3023, "step": 425, "task_loss": 0.1351103037595749 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38226479291915894, "epoch": 0.36, "learning_rate": 4.7427846878396334e-05, "loss": 0.456, "step": 426, "task_loss": 1.4345195293426514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4729475975036621, "epoch": 0.36, "learning_rate": 4.74218089602705e-05, "loss": 0.3641, "step": 427, "task_loss": 2.2501349449157715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45532089471817017, "epoch": 0.36, "learning_rate": 4.741577104214467e-05, "loss": 0.4858, "step": 428, "task_loss": 1.2799084186553955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22925134003162384, "epoch": 0.36, "learning_rate": 4.740973312401884e-05, "loss": 0.4835, "step": 429, "task_loss": 0.8517360687255859 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2979576587677002, "epoch": 0.36, "learning_rate": 4.740369520589301e-05, "loss": 0.4111, "step": 430, "task_loss": 1.4663408994674683 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3938520550727844, "epoch": 0.36, "learning_rate": 4.7397657287767176e-05, "loss": 0.4239, "step": 431, "task_loss": 1.036360502243042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17265523970127106, "epoch": 0.36, "learning_rate": 4.739161936964135e-05, "loss": 0.2999, "step": 432, "task_loss": 0.6008455753326416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3358423411846161, "epoch": 0.37, "learning_rate": 4.738558145151552e-05, "loss": 0.3109, "step": 433, "task_loss": 0.37620896100997925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32300394773483276, "epoch": 0.37, "learning_rate": 4.737954353338969e-05, "loss": 0.3246, "step": 434, "task_loss": 0.9428640604019165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16895656287670135, "epoch": 0.37, "learning_rate": 4.737350561526386e-05, "loss": 0.2193, "step": 435, "task_loss": 0.18554706871509552 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3725835978984833, "epoch": 0.37, "learning_rate": 4.736746769713803e-05, "loss": 0.3814, "step": 436, "task_loss": 0.5290009379386902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18242278695106506, "epoch": 0.37, "learning_rate": 4.73614297790122e-05, "loss": 0.2961, "step": 437, "task_loss": 1.3827800750732422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5046520233154297, "epoch": 0.37, "learning_rate": 4.735539186088637e-05, "loss": 0.4822, "step": 438, "task_loss": 1.2833842039108276 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3221445381641388, "epoch": 0.37, "learning_rate": 4.734935394276054e-05, "loss": 0.292, "step": 439, "task_loss": 0.7684510946273804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.233101025223732, "epoch": 0.37, "learning_rate": 4.734331602463471e-05, "loss": 0.3785, "step": 440, "task_loss": 0.5354031324386597 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20971271395683289, "epoch": 0.37, "learning_rate": 4.7337278106508875e-05, "loss": 0.3069, "step": 441, "task_loss": 0.0696810707449913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27547770738601685, "epoch": 0.37, "learning_rate": 4.733124018838305e-05, "loss": 0.261, "step": 442, "task_loss": 1.111402153968811 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2920394837856293, "epoch": 0.37, "learning_rate": 4.7325202270257216e-05, "loss": 0.4509, "step": 443, "task_loss": 0.44566458463668823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36836498975753784, "epoch": 0.38, "learning_rate": 4.731916435213139e-05, "loss": 0.4035, "step": 444, "task_loss": 0.46983402967453003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27676546573638916, "epoch": 0.38, "learning_rate": 4.731312643400556e-05, "loss": 0.3703, "step": 445, "task_loss": 0.7192262411117554 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16957899928092957, "epoch": 0.38, "learning_rate": 4.7307088515879725e-05, "loss": 0.2624, "step": 446, "task_loss": 0.319479376077652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25872209668159485, "epoch": 0.38, "learning_rate": 4.73010505977539e-05, "loss": 0.3256, "step": 447, "task_loss": 0.8407450914382935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2410227507352829, "epoch": 0.38, "learning_rate": 4.7295012679628066e-05, "loss": 0.5662, "step": 448, "task_loss": 0.7649925947189331 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3731471300125122, "epoch": 0.38, "learning_rate": 4.728897476150223e-05, "loss": 0.3712, "step": 449, "task_loss": 1.7003575563430786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22050905227661133, "epoch": 0.38, "learning_rate": 4.728293684337641e-05, "loss": 0.3312, "step": 450, "task_loss": 0.3010488450527191 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2045070081949234, "epoch": 0.38, "learning_rate": 4.7276898925250574e-05, "loss": 0.2589, "step": 451, "task_loss": 0.2576950490474701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16870079934597015, "epoch": 0.38, "learning_rate": 4.727086100712475e-05, "loss": 0.2176, "step": 452, "task_loss": 0.1162392720580101 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33130523562431335, "epoch": 0.38, "learning_rate": 4.7264823088998915e-05, "loss": 0.4402, "step": 453, "task_loss": 0.28806960582733154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2734120190143585, "epoch": 0.38, "learning_rate": 4.725878517087309e-05, "loss": 0.3589, "step": 454, "task_loss": 0.47519171237945557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.13673269748687744, "epoch": 0.38, "learning_rate": 4.7252747252747257e-05, "loss": 0.2188, "step": 455, "task_loss": 0.9201744198799133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27608251571655273, "epoch": 0.39, "learning_rate": 4.7246709334621424e-05, "loss": 0.3168, "step": 456, "task_loss": 0.08681680262088776 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22551238536834717, "epoch": 0.39, "learning_rate": 4.72406714164956e-05, "loss": 0.2206, "step": 457, "task_loss": 0.3846595585346222 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18794289231300354, "epoch": 0.39, "learning_rate": 4.7234633498369765e-05, "loss": 0.4014, "step": 458, "task_loss": 1.023073673248291 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2077312469482422, "epoch": 0.39, "learning_rate": 4.722859558024393e-05, "loss": 0.3055, "step": 459, "task_loss": 0.9825731515884399 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3220754861831665, "epoch": 0.39, "learning_rate": 4.7222557662118106e-05, "loss": 0.3317, "step": 460, "task_loss": 0.8694445490837097 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22628703713417053, "epoch": 0.39, "learning_rate": 4.721651974399227e-05, "loss": 0.3871, "step": 461, "task_loss": 1.0766775608062744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26784414052963257, "epoch": 0.39, "learning_rate": 4.721048182586644e-05, "loss": 0.4032, "step": 462, "task_loss": 0.22623789310455322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29119762778282166, "epoch": 0.39, "learning_rate": 4.7204443907740614e-05, "loss": 0.3598, "step": 463, "task_loss": 0.26820826530456543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6346370577812195, "epoch": 0.39, "learning_rate": 4.719840598961479e-05, "loss": 0.3285, "step": 464, "task_loss": 0.6106746792793274 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2859550714492798, "epoch": 0.39, "learning_rate": 4.719236807148895e-05, "loss": 0.3351, "step": 465, "task_loss": 0.5985117554664612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3339540362358093, "epoch": 0.39, "learning_rate": 4.718633015336312e-05, "loss": 0.2655, "step": 466, "task_loss": 0.31310099363327026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2721252143383026, "epoch": 0.39, "learning_rate": 4.7180292235237297e-05, "loss": 0.3153, "step": 467, "task_loss": 0.32043519616127014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31591564416885376, "epoch": 0.4, "learning_rate": 4.7174254317111464e-05, "loss": 0.3666, "step": 468, "task_loss": 0.6312203407287598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14705270528793335, "epoch": 0.4, "learning_rate": 4.716821639898563e-05, "loss": 0.3255, "step": 469, "task_loss": 0.060620732605457306 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.604712188243866, "epoch": 0.4, "learning_rate": 4.7162178480859805e-05, "loss": 0.3902, "step": 470, "task_loss": 1.1861919164657593 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5095392465591431, "epoch": 0.4, "learning_rate": 4.715614056273397e-05, "loss": 0.4131, "step": 471, "task_loss": 1.126703143119812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2504306137561798, "epoch": 0.4, "learning_rate": 4.715010264460814e-05, "loss": 0.2887, "step": 472, "task_loss": 0.699195146560669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5808939933776855, "epoch": 0.4, "learning_rate": 4.714406472648231e-05, "loss": 0.374, "step": 473, "task_loss": 0.6701858043670654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3845038414001465, "epoch": 0.4, "learning_rate": 4.713802680835648e-05, "loss": 0.2648, "step": 474, "task_loss": 0.31238487362861633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40197765827178955, "epoch": 0.4, "learning_rate": 4.713198889023065e-05, "loss": 0.3163, "step": 475, "task_loss": 0.5990235805511475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16502070426940918, "epoch": 0.4, "learning_rate": 4.712595097210482e-05, "loss": 0.3292, "step": 476, "task_loss": 0.6950055956840515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30431246757507324, "epoch": 0.4, "learning_rate": 4.7119913053978996e-05, "loss": 0.3626, "step": 477, "task_loss": 0.12289254367351532 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5517590641975403, "epoch": 0.4, "learning_rate": 4.7113875135853156e-05, "loss": 0.4158, "step": 478, "task_loss": 1.1829577684402466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.339616596698761, "epoch": 0.4, "learning_rate": 4.710783721772733e-05, "loss": 0.3549, "step": 479, "task_loss": 1.0606290102005005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31139183044433594, "epoch": 0.41, "learning_rate": 4.7101799299601504e-05, "loss": 0.3531, "step": 480, "task_loss": 0.9595376253128052 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4760275185108185, "epoch": 0.41, "learning_rate": 4.7095761381475664e-05, "loss": 0.3654, "step": 481, "task_loss": 0.9820896983146667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22577998042106628, "epoch": 0.41, "learning_rate": 4.708972346334984e-05, "loss": 0.2849, "step": 482, "task_loss": 0.3902606666088104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2948797941207886, "epoch": 0.41, "learning_rate": 4.708368554522401e-05, "loss": 0.2689, "step": 483, "task_loss": 0.7536925673484802 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2625094950199127, "epoch": 0.41, "learning_rate": 4.707764762709818e-05, "loss": 0.3218, "step": 484, "task_loss": 0.5010858178138733 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37831148505210876, "epoch": 0.41, "learning_rate": 4.7071609708972347e-05, "loss": 0.4067, "step": 485, "task_loss": 1.3217554092407227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2693570852279663, "epoch": 0.41, "learning_rate": 4.706557179084652e-05, "loss": 0.3778, "step": 486, "task_loss": 0.89955735206604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2764507532119751, "epoch": 0.41, "learning_rate": 4.705953387272069e-05, "loss": 0.2848, "step": 487, "task_loss": 0.765638530254364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3032151758670807, "epoch": 0.41, "learning_rate": 4.7053495954594855e-05, "loss": 0.2813, "step": 488, "task_loss": 0.28684601187705994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28353601694107056, "epoch": 0.41, "learning_rate": 4.704745803646903e-05, "loss": 0.3289, "step": 489, "task_loss": 0.387953519821167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32782772183418274, "epoch": 0.41, "learning_rate": 4.7041420118343196e-05, "loss": 0.3847, "step": 490, "task_loss": 0.28010067343711853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21585595607757568, "epoch": 0.41, "learning_rate": 4.703538220021736e-05, "loss": 0.325, "step": 491, "task_loss": 0.47503677010536194 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5849031805992126, "epoch": 0.42, "learning_rate": 4.702934428209154e-05, "loss": 0.5094, "step": 492, "task_loss": 0.42090895771980286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36460816860198975, "epoch": 0.42, "learning_rate": 4.702330636396571e-05, "loss": 0.367, "step": 493, "task_loss": 0.6196091175079346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18175528943538666, "epoch": 0.42, "learning_rate": 4.701726844583988e-05, "loss": 0.3347, "step": 494, "task_loss": 0.057926326990127563 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2345447540283203, "epoch": 0.42, "learning_rate": 4.7011230527714045e-05, "loss": 0.4054, "step": 495, "task_loss": 0.3000459671020508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32246047258377075, "epoch": 0.42, "learning_rate": 4.700519260958822e-05, "loss": 0.3606, "step": 496, "task_loss": 1.0355428457260132 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5496488213539124, "epoch": 0.42, "learning_rate": 4.6999154691462387e-05, "loss": 0.5024, "step": 497, "task_loss": 0.626095712184906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3241245448589325, "epoch": 0.42, "learning_rate": 4.6993116773336554e-05, "loss": 0.3513, "step": 498, "task_loss": 0.3973882496356964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20134803652763367, "epoch": 0.42, "learning_rate": 4.698707885521073e-05, "loss": 0.3078, "step": 499, "task_loss": 0.6092415452003479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2920314073562622, "epoch": 0.42, "learning_rate": 4.6981040937084895e-05, "loss": 0.2342, "step": 500, "task_loss": 0.03425178676843643 }, { "epoch": 0.42, "eval_accuracy": 0.909940594059406, "eval_loss": 0.19931641221046448, "eval_runtime": 331.9728, "eval_samples_per_second": 76.06, "eval_steps_per_second": 0.596, "step": 500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3214389383792877, "epoch": 0.42, "learning_rate": 4.697500301895906e-05, "loss": 0.426, "step": 501, "task_loss": 1.1752419471740723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24752089381217957, "epoch": 0.42, "learning_rate": 4.6968965100833236e-05, "loss": 0.3073, "step": 502, "task_loss": 0.22783724963665009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35262978076934814, "epoch": 0.42, "learning_rate": 4.69629271827074e-05, "loss": 0.3475, "step": 503, "task_loss": 0.5075512528419495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25392016768455505, "epoch": 0.43, "learning_rate": 4.695688926458158e-05, "loss": 0.3437, "step": 504, "task_loss": 0.8161268830299377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2642647922039032, "epoch": 0.43, "learning_rate": 4.6950851346455744e-05, "loss": 0.2824, "step": 505, "task_loss": 0.510317862033844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21475283801555634, "epoch": 0.43, "learning_rate": 4.694481342832991e-05, "loss": 0.2836, "step": 506, "task_loss": 0.502383291721344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3356464207172394, "epoch": 0.43, "learning_rate": 4.6938775510204086e-05, "loss": 0.2767, "step": 507, "task_loss": 0.32697027921676636 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.13120843470096588, "epoch": 0.43, "learning_rate": 4.693273759207825e-05, "loss": 0.2454, "step": 508, "task_loss": 0.24196498095989227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21002618968486786, "epoch": 0.43, "learning_rate": 4.692669967395243e-05, "loss": 0.2676, "step": 509, "task_loss": 0.6450607776641846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5171314477920532, "epoch": 0.43, "learning_rate": 4.6920661755826594e-05, "loss": 0.3279, "step": 510, "task_loss": 0.5804485082626343 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23054203391075134, "epoch": 0.43, "learning_rate": 4.691462383770076e-05, "loss": 0.3596, "step": 511, "task_loss": 0.6389950513839722 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23445239663124084, "epoch": 0.43, "learning_rate": 4.6908585919574935e-05, "loss": 0.4259, "step": 512, "task_loss": 0.6804769039154053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6749568581581116, "epoch": 0.43, "learning_rate": 4.69025480014491e-05, "loss": 0.3647, "step": 513, "task_loss": 0.46374768018722534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28037142753601074, "epoch": 0.43, "learning_rate": 4.6896510083323276e-05, "loss": 0.3379, "step": 514, "task_loss": 1.3123433589935303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28470367193222046, "epoch": 0.44, "learning_rate": 4.689047216519744e-05, "loss": 0.3898, "step": 515, "task_loss": 0.8805803060531616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48044174909591675, "epoch": 0.44, "learning_rate": 4.688443424707161e-05, "loss": 0.3724, "step": 516, "task_loss": 0.4094547629356384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5202345252037048, "epoch": 0.44, "learning_rate": 4.6878396328945784e-05, "loss": 0.3702, "step": 517, "task_loss": 1.0218853950500488 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31284359097480774, "epoch": 0.44, "learning_rate": 4.687235841081995e-05, "loss": 0.2752, "step": 518, "task_loss": 0.37680670619010925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4007658064365387, "epoch": 0.44, "learning_rate": 4.686632049269412e-05, "loss": 0.3542, "step": 519, "task_loss": 1.166516661643982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3821266293525696, "epoch": 0.44, "learning_rate": 4.686028257456829e-05, "loss": 0.288, "step": 520, "task_loss": 0.20097209513187408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3633727431297302, "epoch": 0.44, "learning_rate": 4.685424465644246e-05, "loss": 0.4106, "step": 521, "task_loss": 0.518059253692627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48358604311943054, "epoch": 0.44, "learning_rate": 4.684820673831663e-05, "loss": 0.4079, "step": 522, "task_loss": 0.40281111001968384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2619108557701111, "epoch": 0.44, "learning_rate": 4.68421688201908e-05, "loss": 0.298, "step": 523, "task_loss": 0.591888964176178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5433233380317688, "epoch": 0.44, "learning_rate": 4.6836130902064975e-05, "loss": 0.3943, "step": 524, "task_loss": 0.6602164506912231 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27574366331100464, "epoch": 0.44, "learning_rate": 4.683009298393914e-05, "loss": 0.3367, "step": 525, "task_loss": 0.40031930804252625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23611882328987122, "epoch": 0.44, "learning_rate": 4.682405506581331e-05, "loss": 0.3268, "step": 526, "task_loss": 0.6735096573829651 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3347584009170532, "epoch": 0.45, "learning_rate": 4.6818017147687483e-05, "loss": 0.3314, "step": 527, "task_loss": 1.0133256912231445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4046631455421448, "epoch": 0.45, "learning_rate": 4.681197922956165e-05, "loss": 0.3675, "step": 528, "task_loss": 0.22774213552474976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14145728945732117, "epoch": 0.45, "learning_rate": 4.680594131143582e-05, "loss": 0.2624, "step": 529, "task_loss": 0.14753012359142303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1398371458053589, "epoch": 0.45, "learning_rate": 4.679990339330999e-05, "loss": 0.2788, "step": 530, "task_loss": 0.3750229775905609 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32043927907943726, "epoch": 0.45, "learning_rate": 4.679386547518416e-05, "loss": 0.2973, "step": 531, "task_loss": 0.2027919590473175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39350372552871704, "epoch": 0.45, "learning_rate": 4.6787827557058326e-05, "loss": 0.4484, "step": 532, "task_loss": 0.8419382572174072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42982804775238037, "epoch": 0.45, "learning_rate": 4.67817896389325e-05, "loss": 0.4461, "step": 533, "task_loss": 0.3650851249694824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37214958667755127, "epoch": 0.45, "learning_rate": 4.677575172080667e-05, "loss": 0.3964, "step": 534, "task_loss": 1.0011132955551147 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26830512285232544, "epoch": 0.45, "learning_rate": 4.6769713802680834e-05, "loss": 0.2929, "step": 535, "task_loss": 0.06840169429779053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24159786105155945, "epoch": 0.45, "learning_rate": 4.676367588455501e-05, "loss": 0.3243, "step": 536, "task_loss": 0.4852682650089264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4502754211425781, "epoch": 0.45, "learning_rate": 4.675763796642918e-05, "loss": 0.4559, "step": 537, "task_loss": 0.9336734414100647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21779321134090424, "epoch": 0.45, "learning_rate": 4.675160004830334e-05, "loss": 0.3455, "step": 538, "task_loss": 0.8196550607681274 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28122198581695557, "epoch": 0.46, "learning_rate": 4.674556213017752e-05, "loss": 0.2614, "step": 539, "task_loss": 0.5415457487106323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25979700684547424, "epoch": 0.46, "learning_rate": 4.673952421205169e-05, "loss": 0.42, "step": 540, "task_loss": 0.40739837288856506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2552754878997803, "epoch": 0.46, "learning_rate": 4.673348629392585e-05, "loss": 0.3313, "step": 541, "task_loss": 0.5863358378410339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2257552146911621, "epoch": 0.46, "learning_rate": 4.6727448375800025e-05, "loss": 0.2739, "step": 542, "task_loss": 0.7218215465545654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30799582600593567, "epoch": 0.46, "learning_rate": 4.67214104576742e-05, "loss": 0.2731, "step": 543, "task_loss": 0.9804763197898865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45081108808517456, "epoch": 0.46, "learning_rate": 4.6715372539548366e-05, "loss": 0.3298, "step": 544, "task_loss": 0.4849703013896942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5152310132980347, "epoch": 0.46, "learning_rate": 4.670933462142253e-05, "loss": 0.4056, "step": 545, "task_loss": 1.0091228485107422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15075373649597168, "epoch": 0.46, "learning_rate": 4.670329670329671e-05, "loss": 0.2717, "step": 546, "task_loss": 0.22786150872707367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3042258620262146, "epoch": 0.46, "learning_rate": 4.6697258785170875e-05, "loss": 0.3017, "step": 547, "task_loss": 0.26641228795051575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2634465992450714, "epoch": 0.46, "learning_rate": 4.669122086704504e-05, "loss": 0.2981, "step": 548, "task_loss": 0.486630380153656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3336077630519867, "epoch": 0.46, "learning_rate": 4.6685182948919216e-05, "loss": 0.4622, "step": 549, "task_loss": 1.4173078536987305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24328269064426422, "epoch": 0.46, "learning_rate": 4.667914503079339e-05, "loss": 0.3263, "step": 550, "task_loss": 0.8122907876968384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15282608568668365, "epoch": 0.47, "learning_rate": 4.667310711266755e-05, "loss": 0.2747, "step": 551, "task_loss": 0.873925507068634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20954471826553345, "epoch": 0.47, "learning_rate": 4.6667069194541724e-05, "loss": 0.3625, "step": 552, "task_loss": 1.0411988496780396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16053080558776855, "epoch": 0.47, "learning_rate": 4.66610312764159e-05, "loss": 0.37, "step": 553, "task_loss": 1.2061426639556885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2893539369106293, "epoch": 0.47, "learning_rate": 4.665499335829006e-05, "loss": 0.3157, "step": 554, "task_loss": 0.19377155601978302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22081708908081055, "epoch": 0.47, "learning_rate": 4.664895544016423e-05, "loss": 0.3484, "step": 555, "task_loss": 0.4346272647380829 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3479026257991791, "epoch": 0.47, "learning_rate": 4.6642917522038406e-05, "loss": 0.3105, "step": 556, "task_loss": 0.42657533288002014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33032193779945374, "epoch": 0.47, "learning_rate": 4.6636879603912573e-05, "loss": 0.4444, "step": 557, "task_loss": 1.436732292175293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26687997579574585, "epoch": 0.47, "learning_rate": 4.663084168578674e-05, "loss": 0.284, "step": 558, "task_loss": 0.2994716167449951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5433803200721741, "epoch": 0.47, "learning_rate": 4.6624803767660915e-05, "loss": 0.3724, "step": 559, "task_loss": 0.539162814617157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3357256054878235, "epoch": 0.47, "learning_rate": 4.661876584953508e-05, "loss": 0.3312, "step": 560, "task_loss": 0.7900286912918091 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30817902088165283, "epoch": 0.47, "learning_rate": 4.661272793140925e-05, "loss": 0.3421, "step": 561, "task_loss": 0.6682403087615967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38211166858673096, "epoch": 0.47, "learning_rate": 4.660669001328342e-05, "loss": 0.5231, "step": 562, "task_loss": 0.9012933373451233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2623595893383026, "epoch": 0.48, "learning_rate": 4.660065209515759e-05, "loss": 0.291, "step": 563, "task_loss": 0.7156077027320862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2378789633512497, "epoch": 0.48, "learning_rate": 4.659461417703176e-05, "loss": 0.2761, "step": 564, "task_loss": 1.1595385074615479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41604775190353394, "epoch": 0.48, "learning_rate": 4.658857625890593e-05, "loss": 0.3418, "step": 565, "task_loss": 0.4851347804069519 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2410268485546112, "epoch": 0.48, "learning_rate": 4.6582538340780105e-05, "loss": 0.3203, "step": 566, "task_loss": 0.07295583188533783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2732866406440735, "epoch": 0.48, "learning_rate": 4.657650042265427e-05, "loss": 0.4193, "step": 567, "task_loss": 1.6112494468688965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1995835155248642, "epoch": 0.48, "learning_rate": 4.657046250452844e-05, "loss": 0.3585, "step": 568, "task_loss": 0.05776522308588028 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3214224576950073, "epoch": 0.48, "learning_rate": 4.6564424586402614e-05, "loss": 0.3614, "step": 569, "task_loss": 0.7416189908981323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.541674017906189, "epoch": 0.48, "learning_rate": 4.655838666827678e-05, "loss": 0.3154, "step": 570, "task_loss": 0.4410979449748993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26405900716781616, "epoch": 0.48, "learning_rate": 4.655234875015095e-05, "loss": 0.3339, "step": 571, "task_loss": 0.7340288758277893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4169883728027344, "epoch": 0.48, "learning_rate": 4.654631083202512e-05, "loss": 0.2863, "step": 572, "task_loss": 0.3259666860103607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2947605550289154, "epoch": 0.48, "learning_rate": 4.654027291389929e-05, "loss": 0.3098, "step": 573, "task_loss": 0.867961585521698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1909080594778061, "epoch": 0.48, "learning_rate": 4.6534234995773456e-05, "loss": 0.2975, "step": 574, "task_loss": 0.7679494023323059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22989346086978912, "epoch": 0.49, "learning_rate": 4.652819707764763e-05, "loss": 0.3853, "step": 575, "task_loss": 0.47458720207214355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2999410331249237, "epoch": 0.49, "learning_rate": 4.65221591595218e-05, "loss": 0.3398, "step": 576, "task_loss": 0.26157906651496887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2149704396724701, "epoch": 0.49, "learning_rate": 4.651612124139597e-05, "loss": 0.4127, "step": 577, "task_loss": 0.9646743535995483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26012635231018066, "epoch": 0.49, "learning_rate": 4.651008332327014e-05, "loss": 0.3198, "step": 578, "task_loss": 0.6882126331329346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3122275471687317, "epoch": 0.49, "learning_rate": 4.6504045405144306e-05, "loss": 0.5031, "step": 579, "task_loss": 1.3654959201812744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36982664465904236, "epoch": 0.49, "learning_rate": 4.649800748701848e-05, "loss": 0.3765, "step": 580, "task_loss": 0.9517791271209717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22919541597366333, "epoch": 0.49, "learning_rate": 4.649196956889265e-05, "loss": 0.2715, "step": 581, "task_loss": 0.5387917757034302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4595698118209839, "epoch": 0.49, "learning_rate": 4.648593165076682e-05, "loss": 0.4974, "step": 582, "task_loss": 1.2211872339248657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22751200199127197, "epoch": 0.49, "learning_rate": 4.647989373264099e-05, "loss": 0.3254, "step": 583, "task_loss": 0.2121063619852066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12638330459594727, "epoch": 0.49, "learning_rate": 4.6473855814515155e-05, "loss": 0.3561, "step": 584, "task_loss": 1.011884331703186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5068357586860657, "epoch": 0.49, "learning_rate": 4.646781789638933e-05, "loss": 0.4509, "step": 585, "task_loss": 1.340288519859314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16345316171646118, "epoch": 0.5, "learning_rate": 4.6461779978263496e-05, "loss": 0.2564, "step": 586, "task_loss": 0.17975908517837524 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38372111320495605, "epoch": 0.5, "learning_rate": 4.645574206013767e-05, "loss": 0.3754, "step": 587, "task_loss": 0.5905306339263916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5385730862617493, "epoch": 0.5, "learning_rate": 4.644970414201184e-05, "loss": 0.4212, "step": 588, "task_loss": 0.8999453783035278 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23194439709186554, "epoch": 0.5, "learning_rate": 4.6443666223886005e-05, "loss": 0.3761, "step": 589, "task_loss": 0.3124553859233856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3309844136238098, "epoch": 0.5, "learning_rate": 4.643762830576018e-05, "loss": 0.3219, "step": 590, "task_loss": 1.0926543474197388 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1828235685825348, "epoch": 0.5, "learning_rate": 4.6431590387634346e-05, "loss": 0.3361, "step": 591, "task_loss": 0.15941619873046875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2561258375644684, "epoch": 0.5, "learning_rate": 4.642555246950851e-05, "loss": 0.3175, "step": 592, "task_loss": 0.32400915026664734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26085364818573, "epoch": 0.5, "learning_rate": 4.641951455138269e-05, "loss": 0.2827, "step": 593, "task_loss": 0.3379563093185425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14708326756954193, "epoch": 0.5, "learning_rate": 4.6413476633256854e-05, "loss": 0.2454, "step": 594, "task_loss": 0.29252734780311584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44123297929763794, "epoch": 0.5, "learning_rate": 4.640743871513102e-05, "loss": 0.4009, "step": 595, "task_loss": 0.3806767463684082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3449915945529938, "epoch": 0.5, "learning_rate": 4.6401400797005195e-05, "loss": 0.3228, "step": 596, "task_loss": 0.9835615754127502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1938939392566681, "epoch": 0.5, "learning_rate": 4.639536287887937e-05, "loss": 0.3936, "step": 597, "task_loss": 0.23504403233528137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3289240598678589, "epoch": 0.51, "learning_rate": 4.638932496075353e-05, "loss": 0.467, "step": 598, "task_loss": 0.8298601508140564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18604901432991028, "epoch": 0.51, "learning_rate": 4.6383287042627704e-05, "loss": 0.3035, "step": 599, "task_loss": 0.34646496176719666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34616750478744507, "epoch": 0.51, "learning_rate": 4.637724912450188e-05, "loss": 0.4035, "step": 600, "task_loss": 1.0226186513900757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16837868094444275, "epoch": 0.51, "learning_rate": 4.6371211206376045e-05, "loss": 0.4226, "step": 601, "task_loss": 1.460707187652588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.272234708070755, "epoch": 0.51, "learning_rate": 4.636517328825021e-05, "loss": 0.3578, "step": 602, "task_loss": 0.5632961392402649 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4402560591697693, "epoch": 0.51, "learning_rate": 4.6359135370124386e-05, "loss": 0.2791, "step": 603, "task_loss": 0.39590609073638916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4244212210178375, "epoch": 0.51, "learning_rate": 4.635309745199855e-05, "loss": 0.2687, "step": 604, "task_loss": 0.8300043940544128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25357484817504883, "epoch": 0.51, "learning_rate": 4.634705953387272e-05, "loss": 0.2405, "step": 605, "task_loss": 0.30436965823173523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18639443814754486, "epoch": 0.51, "learning_rate": 4.6341021615746894e-05, "loss": 0.2873, "step": 606, "task_loss": 0.1057465448975563 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3504343628883362, "epoch": 0.51, "learning_rate": 4.633498369762107e-05, "loss": 0.3239, "step": 607, "task_loss": 0.3092884123325348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5062164068222046, "epoch": 0.51, "learning_rate": 4.632894577949523e-05, "loss": 0.396, "step": 608, "task_loss": 0.8111938834190369 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.262260377407074, "epoch": 0.51, "learning_rate": 4.63229078613694e-05, "loss": 0.3223, "step": 609, "task_loss": 0.5450887680053711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24830937385559082, "epoch": 0.52, "learning_rate": 4.6316869943243576e-05, "loss": 0.3151, "step": 610, "task_loss": 0.45044833421707153 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36170318722724915, "epoch": 0.52, "learning_rate": 4.631083202511774e-05, "loss": 0.3439, "step": 611, "task_loss": 0.9887471795082092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19432632625102997, "epoch": 0.52, "learning_rate": 4.630479410699191e-05, "loss": 0.4452, "step": 612, "task_loss": 0.6152409911155701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40000784397125244, "epoch": 0.52, "learning_rate": 4.6298756188866085e-05, "loss": 0.4108, "step": 613, "task_loss": 0.8184918165206909 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35141733288764954, "epoch": 0.52, "learning_rate": 4.6292718270740245e-05, "loss": 0.3557, "step": 614, "task_loss": 1.2374716997146606 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30691444873809814, "epoch": 0.52, "learning_rate": 4.628668035261442e-05, "loss": 0.4459, "step": 615, "task_loss": 1.2013359069824219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.249248206615448, "epoch": 0.52, "learning_rate": 4.628064243448859e-05, "loss": 0.3045, "step": 616, "task_loss": 1.0391610860824585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26169002056121826, "epoch": 0.52, "learning_rate": 4.627460451636276e-05, "loss": 0.332, "step": 617, "task_loss": 1.112314224243164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3964969515800476, "epoch": 0.52, "learning_rate": 4.626856659823693e-05, "loss": 0.4359, "step": 618, "task_loss": 0.752595067024231 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42917415499687195, "epoch": 0.52, "learning_rate": 4.62625286801111e-05, "loss": 0.3633, "step": 619, "task_loss": 0.2796061336994171 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3168586194515228, "epoch": 0.52, "learning_rate": 4.625649076198527e-05, "loss": 0.419, "step": 620, "task_loss": 0.6878929138183594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2845689058303833, "epoch": 0.52, "learning_rate": 4.6250452843859436e-05, "loss": 0.3725, "step": 621, "task_loss": 1.1580651998519897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5346139669418335, "epoch": 0.53, "learning_rate": 4.624441492573361e-05, "loss": 0.3973, "step": 622, "task_loss": 1.4752721786499023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15969890356063843, "epoch": 0.53, "learning_rate": 4.6238377007607784e-05, "loss": 0.3206, "step": 623, "task_loss": 0.08208628743886948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.319805383682251, "epoch": 0.53, "learning_rate": 4.6232339089481944e-05, "loss": 0.413, "step": 624, "task_loss": 0.4376118779182434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42242658138275146, "epoch": 0.53, "learning_rate": 4.622630117135612e-05, "loss": 0.4223, "step": 625, "task_loss": 0.5285881161689758 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21696613729000092, "epoch": 0.53, "learning_rate": 4.622026325323029e-05, "loss": 0.3114, "step": 626, "task_loss": 0.6784839630126953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4995373487472534, "epoch": 0.53, "learning_rate": 4.621422533510446e-05, "loss": 0.4397, "step": 627, "task_loss": 0.5694563388824463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.385191947221756, "epoch": 0.53, "learning_rate": 4.6208187416978626e-05, "loss": 0.3475, "step": 628, "task_loss": 0.7956068515777588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33759069442749023, "epoch": 0.53, "learning_rate": 4.62021494988528e-05, "loss": 0.3289, "step": 629, "task_loss": 0.9872652888298035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3885927200317383, "epoch": 0.53, "learning_rate": 4.619611158072697e-05, "loss": 0.3227, "step": 630, "task_loss": 1.1469123363494873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30725589394569397, "epoch": 0.53, "learning_rate": 4.6190073662601135e-05, "loss": 0.2629, "step": 631, "task_loss": 0.49530598521232605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3132869005203247, "epoch": 0.53, "learning_rate": 4.618403574447531e-05, "loss": 0.3612, "step": 632, "task_loss": 0.9310511946678162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4931173324584961, "epoch": 0.53, "learning_rate": 4.6177997826349476e-05, "loss": 0.3286, "step": 633, "task_loss": 0.2740153968334198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29195505380630493, "epoch": 0.54, "learning_rate": 4.617195990822364e-05, "loss": 0.2928, "step": 634, "task_loss": 0.7763641476631165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18152590095996857, "epoch": 0.54, "learning_rate": 4.616592199009782e-05, "loss": 0.2984, "step": 635, "task_loss": 0.5429837107658386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21703627705574036, "epoch": 0.54, "learning_rate": 4.6159884071971984e-05, "loss": 0.3682, "step": 636, "task_loss": 1.5745832920074463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3210102915763855, "epoch": 0.54, "learning_rate": 4.615384615384616e-05, "loss": 0.2953, "step": 637, "task_loss": 0.7568743228912354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30893927812576294, "epoch": 0.54, "learning_rate": 4.6147808235720325e-05, "loss": 0.3394, "step": 638, "task_loss": 1.140456199645996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3295965790748596, "epoch": 0.54, "learning_rate": 4.61417703175945e-05, "loss": 0.3166, "step": 639, "task_loss": 0.18963338434696198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45958712697029114, "epoch": 0.54, "learning_rate": 4.6135732399468666e-05, "loss": 0.3025, "step": 640, "task_loss": 0.3089500367641449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17197713255882263, "epoch": 0.54, "learning_rate": 4.6129694481342834e-05, "loss": 0.3289, "step": 641, "task_loss": 0.16987687349319458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3031250834465027, "epoch": 0.54, "learning_rate": 4.612365656321701e-05, "loss": 0.3966, "step": 642, "task_loss": 0.9233768582344055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2365124225616455, "epoch": 0.54, "learning_rate": 4.6117618645091175e-05, "loss": 0.3898, "step": 643, "task_loss": 0.9150201082229614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2222016453742981, "epoch": 0.54, "learning_rate": 4.611158072696534e-05, "loss": 0.3668, "step": 644, "task_loss": 0.3421773314476013 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1851375848054886, "epoch": 0.54, "learning_rate": 4.6105542808839516e-05, "loss": 0.3198, "step": 645, "task_loss": 0.9044616222381592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28746968507766724, "epoch": 0.55, "learning_rate": 4.609950489071368e-05, "loss": 0.3223, "step": 646, "task_loss": 0.18459787964820862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4223170280456543, "epoch": 0.55, "learning_rate": 4.609346697258786e-05, "loss": 0.3582, "step": 647, "task_loss": 0.29560795426368713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22690781950950623, "epoch": 0.55, "learning_rate": 4.6087429054462024e-05, "loss": 0.3841, "step": 648, "task_loss": 0.30116015672683716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2884330749511719, "epoch": 0.55, "learning_rate": 4.608139113633619e-05, "loss": 0.3389, "step": 649, "task_loss": 0.49120450019836426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23705758154392242, "epoch": 0.55, "learning_rate": 4.6075353218210365e-05, "loss": 0.3309, "step": 650, "task_loss": 0.7665039300918579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2771817445755005, "epoch": 0.55, "learning_rate": 4.606931530008453e-05, "loss": 0.3282, "step": 651, "task_loss": 0.917181134223938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3121924102306366, "epoch": 0.55, "learning_rate": 4.60632773819587e-05, "loss": 0.2852, "step": 652, "task_loss": 0.48826202750205994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38371387124061584, "epoch": 0.55, "learning_rate": 4.6057239463832874e-05, "loss": 0.4694, "step": 653, "task_loss": 0.16272789239883423 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29378846287727356, "epoch": 0.55, "learning_rate": 4.605120154570704e-05, "loss": 0.2867, "step": 654, "task_loss": 0.6163687109947205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5379399061203003, "epoch": 0.55, "learning_rate": 4.6045163627581215e-05, "loss": 0.3337, "step": 655, "task_loss": 0.0715159997344017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.222457617521286, "epoch": 0.55, "learning_rate": 4.603912570945538e-05, "loss": 0.4826, "step": 656, "task_loss": 0.465444952249527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21659287810325623, "epoch": 0.56, "learning_rate": 4.6033087791329556e-05, "loss": 0.3304, "step": 657, "task_loss": 0.6306247711181641 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21352076530456543, "epoch": 0.56, "learning_rate": 4.602704987320372e-05, "loss": 0.3254, "step": 658, "task_loss": 0.5197740793228149 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28062522411346436, "epoch": 0.56, "learning_rate": 4.602101195507789e-05, "loss": 0.4567, "step": 659, "task_loss": 0.7757982611656189 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21774084866046906, "epoch": 0.56, "learning_rate": 4.6014974036952064e-05, "loss": 0.3684, "step": 660, "task_loss": 0.3462081551551819 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5029231905937195, "epoch": 0.56, "learning_rate": 4.600893611882623e-05, "loss": 0.4701, "step": 661, "task_loss": 0.8127275705337524 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32489013671875, "epoch": 0.56, "learning_rate": 4.60028982007004e-05, "loss": 0.3802, "step": 662, "task_loss": 0.5152966976165771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41281700134277344, "epoch": 0.56, "learning_rate": 4.599686028257457e-05, "loss": 0.3943, "step": 663, "task_loss": 0.1648619920015335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16848497092723846, "epoch": 0.56, "learning_rate": 4.599082236444874e-05, "loss": 0.3543, "step": 664, "task_loss": 0.32142090797424316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32245299220085144, "epoch": 0.56, "learning_rate": 4.598478444632291e-05, "loss": 0.3191, "step": 665, "task_loss": 0.6986929178237915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.377815842628479, "epoch": 0.56, "learning_rate": 4.597874652819708e-05, "loss": 0.3344, "step": 666, "task_loss": 0.3277522623538971 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22422122955322266, "epoch": 0.56, "learning_rate": 4.5972708610071255e-05, "loss": 0.3026, "step": 667, "task_loss": 0.39948341250419617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20516589283943176, "epoch": 0.56, "learning_rate": 4.5966670691945415e-05, "loss": 0.3932, "step": 668, "task_loss": 0.640921413898468 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3050314784049988, "epoch": 0.57, "learning_rate": 4.596063277381959e-05, "loss": 0.3807, "step": 669, "task_loss": 0.8235583305358887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32328012585639954, "epoch": 0.57, "learning_rate": 4.595459485569376e-05, "loss": 0.2872, "step": 670, "task_loss": 0.4036175012588501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3610336482524872, "epoch": 0.57, "learning_rate": 4.5948556937567924e-05, "loss": 0.3858, "step": 671, "task_loss": 1.1951301097869873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4432356357574463, "epoch": 0.57, "learning_rate": 4.59425190194421e-05, "loss": 0.4474, "step": 672, "task_loss": 0.5900416970252991 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4261099100112915, "epoch": 0.57, "learning_rate": 4.593648110131627e-05, "loss": 0.3454, "step": 673, "task_loss": 1.055342435836792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20519396662712097, "epoch": 0.57, "learning_rate": 4.593044318319044e-05, "loss": 0.2919, "step": 674, "task_loss": 0.34435221552848816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3543371260166168, "epoch": 0.57, "learning_rate": 4.5924405265064606e-05, "loss": 0.5102, "step": 675, "task_loss": 1.022832989692688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2844223082065582, "epoch": 0.57, "learning_rate": 4.591836734693878e-05, "loss": 0.4493, "step": 676, "task_loss": 0.7816020250320435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20793043076992035, "epoch": 0.57, "learning_rate": 4.591232942881295e-05, "loss": 0.3174, "step": 677, "task_loss": 0.3218630254268646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22465424239635468, "epoch": 0.57, "learning_rate": 4.5906291510687114e-05, "loss": 0.3336, "step": 678, "task_loss": 0.3047090470790863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40233683586120605, "epoch": 0.57, "learning_rate": 4.590025359256129e-05, "loss": 0.3431, "step": 679, "task_loss": 0.4575062096118927 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22025063633918762, "epoch": 0.57, "learning_rate": 4.589421567443546e-05, "loss": 0.3159, "step": 680, "task_loss": 0.794355571269989 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23560495674610138, "epoch": 0.58, "learning_rate": 4.588817775630962e-05, "loss": 0.3999, "step": 681, "task_loss": 0.12755835056304932 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14330744743347168, "epoch": 0.58, "learning_rate": 4.5882139838183797e-05, "loss": 0.3464, "step": 682, "task_loss": 0.36596688628196716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7814731597900391, "epoch": 0.58, "learning_rate": 4.587610192005797e-05, "loss": 0.513, "step": 683, "task_loss": 0.6782071590423584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2075170874595642, "epoch": 0.58, "learning_rate": 4.587006400193213e-05, "loss": 0.2638, "step": 684, "task_loss": 0.6861878633499146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3097824156284332, "epoch": 0.58, "learning_rate": 4.5864026083806305e-05, "loss": 0.368, "step": 685, "task_loss": 0.5533177852630615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3624565899372101, "epoch": 0.58, "learning_rate": 4.585798816568048e-05, "loss": 0.245, "step": 686, "task_loss": 0.16783487796783447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37630215287208557, "epoch": 0.58, "learning_rate": 4.5851950247554646e-05, "loss": 0.3482, "step": 687, "task_loss": 0.9571911692619324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17491865158081055, "epoch": 0.58, "learning_rate": 4.584591232942881e-05, "loss": 0.2734, "step": 688, "task_loss": 0.6345487236976624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4102327823638916, "epoch": 0.58, "learning_rate": 4.583987441130299e-05, "loss": 0.3504, "step": 689, "task_loss": 0.4535101354122162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19544202089309692, "epoch": 0.58, "learning_rate": 4.5833836493177154e-05, "loss": 0.3084, "step": 690, "task_loss": 0.4078769385814667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14398987591266632, "epoch": 0.58, "learning_rate": 4.582779857505132e-05, "loss": 0.3691, "step": 691, "task_loss": 0.3361753821372986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5344889163970947, "epoch": 0.58, "learning_rate": 4.5821760656925496e-05, "loss": 0.4422, "step": 692, "task_loss": 1.0143805742263794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.269297331571579, "epoch": 0.59, "learning_rate": 4.581572273879966e-05, "loss": 0.475, "step": 693, "task_loss": 1.301089882850647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.596970796585083, "epoch": 0.59, "learning_rate": 4.580968482067383e-05, "loss": 0.4276, "step": 694, "task_loss": 0.7972944378852844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38327354192733765, "epoch": 0.59, "learning_rate": 4.5803646902548004e-05, "loss": 0.3179, "step": 695, "task_loss": 1.1533136367797852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30539757013320923, "epoch": 0.59, "learning_rate": 4.579760898442218e-05, "loss": 0.3578, "step": 696, "task_loss": 0.6085097193717957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49501484632492065, "epoch": 0.59, "learning_rate": 4.5791571066296345e-05, "loss": 0.367, "step": 697, "task_loss": 0.8902134299278259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4763624966144562, "epoch": 0.59, "learning_rate": 4.578553314817051e-05, "loss": 0.524, "step": 698, "task_loss": 0.3811951279640198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2410493791103363, "epoch": 0.59, "learning_rate": 4.5779495230044686e-05, "loss": 0.3742, "step": 699, "task_loss": 0.5365801453590393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3064667880535126, "epoch": 0.59, "learning_rate": 4.577345731191885e-05, "loss": 0.4514, "step": 700, "task_loss": 0.4774753153324127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20392590761184692, "epoch": 0.59, "learning_rate": 4.576741939379302e-05, "loss": 0.388, "step": 701, "task_loss": 0.38719120621681213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2861010432243347, "epoch": 0.59, "learning_rate": 4.5761381475667194e-05, "loss": 0.333, "step": 702, "task_loss": 0.6700209975242615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3971763849258423, "epoch": 0.59, "learning_rate": 4.575534355754136e-05, "loss": 0.3521, "step": 703, "task_loss": 0.6572876572608948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1972181797027588, "epoch": 0.59, "learning_rate": 4.574930563941553e-05, "loss": 0.3076, "step": 704, "task_loss": 1.445373296737671 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6471041440963745, "epoch": 0.6, "learning_rate": 4.57432677212897e-05, "loss": 0.3777, "step": 705, "task_loss": 1.7377456426620483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26639580726623535, "epoch": 0.6, "learning_rate": 4.573722980316387e-05, "loss": 0.3094, "step": 706, "task_loss": 0.3332146406173706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26442086696624756, "epoch": 0.6, "learning_rate": 4.573119188503804e-05, "loss": 0.3722, "step": 707, "task_loss": 0.5081799030303955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1975618302822113, "epoch": 0.6, "learning_rate": 4.572515396691221e-05, "loss": 0.2829, "step": 708, "task_loss": 0.5284441709518433 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2323353886604309, "epoch": 0.6, "learning_rate": 4.571911604878638e-05, "loss": 0.5159, "step": 709, "task_loss": 0.4129428267478943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19592002034187317, "epoch": 0.6, "learning_rate": 4.571307813066055e-05, "loss": 0.2867, "step": 710, "task_loss": 0.7605122327804565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24235068261623383, "epoch": 0.6, "learning_rate": 4.570704021253472e-05, "loss": 0.4112, "step": 711, "task_loss": 1.4378414154052734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3608337640762329, "epoch": 0.6, "learning_rate": 4.570100229440889e-05, "loss": 0.3894, "step": 712, "task_loss": 0.7369691729545593 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24902302026748657, "epoch": 0.6, "learning_rate": 4.569496437628306e-05, "loss": 0.2704, "step": 713, "task_loss": 0.14139476418495178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31815749406814575, "epoch": 0.6, "learning_rate": 4.568892645815723e-05, "loss": 0.3584, "step": 714, "task_loss": 0.11305968463420868 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1958599090576172, "epoch": 0.6, "learning_rate": 4.56828885400314e-05, "loss": 0.2655, "step": 715, "task_loss": 0.5565665364265442 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2489815652370453, "epoch": 0.6, "learning_rate": 4.567685062190557e-05, "loss": 0.3402, "step": 716, "task_loss": 0.9807544350624084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2521389424800873, "epoch": 0.61, "learning_rate": 4.5670812703779736e-05, "loss": 0.4316, "step": 717, "task_loss": 0.7441264390945435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.13244979083538055, "epoch": 0.61, "learning_rate": 4.566477478565391e-05, "loss": 0.2782, "step": 718, "task_loss": 0.27505630254745483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1784110814332962, "epoch": 0.61, "learning_rate": 4.565873686752808e-05, "loss": 0.3064, "step": 719, "task_loss": 0.9517579078674316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48789510130882263, "epoch": 0.61, "learning_rate": 4.565269894940225e-05, "loss": 0.4654, "step": 720, "task_loss": 0.9693741798400879 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48611512780189514, "epoch": 0.61, "learning_rate": 4.564666103127642e-05, "loss": 0.342, "step": 721, "task_loss": 0.9063091278076172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34362560510635376, "epoch": 0.61, "learning_rate": 4.5640623113150586e-05, "loss": 0.2903, "step": 722, "task_loss": 0.5386171936988831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18645596504211426, "epoch": 0.61, "learning_rate": 4.563458519502476e-05, "loss": 0.3807, "step": 723, "task_loss": 0.2823803722858429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4445580840110779, "epoch": 0.61, "learning_rate": 4.562854727689893e-05, "loss": 0.3792, "step": 724, "task_loss": 0.8198676109313965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5345179438591003, "epoch": 0.61, "learning_rate": 4.5622509358773094e-05, "loss": 0.3687, "step": 725, "task_loss": 1.057568073272705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42510008811950684, "epoch": 0.61, "learning_rate": 4.561647144064727e-05, "loss": 0.3981, "step": 726, "task_loss": 0.5891158580780029 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3632173240184784, "epoch": 0.61, "learning_rate": 4.5610433522521435e-05, "loss": 0.37, "step": 727, "task_loss": 0.48992300033569336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2429322898387909, "epoch": 0.61, "learning_rate": 4.56043956043956e-05, "loss": 0.3699, "step": 728, "task_loss": 0.529766857624054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23414264619350433, "epoch": 0.62, "learning_rate": 4.5598357686269776e-05, "loss": 0.3312, "step": 729, "task_loss": 0.5960930585861206 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18391002714633942, "epoch": 0.62, "learning_rate": 4.559231976814395e-05, "loss": 0.3333, "step": 730, "task_loss": 0.3421016037464142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26489949226379395, "epoch": 0.62, "learning_rate": 4.558628185001812e-05, "loss": 0.3971, "step": 731, "task_loss": 0.7166442275047302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31561845541000366, "epoch": 0.62, "learning_rate": 4.5580243931892284e-05, "loss": 0.371, "step": 732, "task_loss": 1.1690343618392944 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3441740870475769, "epoch": 0.62, "learning_rate": 4.557420601376646e-05, "loss": 0.4239, "step": 733, "task_loss": 1.1404005289077759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43258577585220337, "epoch": 0.62, "learning_rate": 4.5568168095640626e-05, "loss": 0.3786, "step": 734, "task_loss": 0.6566501259803772 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4298557937145233, "epoch": 0.62, "learning_rate": 4.556213017751479e-05, "loss": 0.4307, "step": 735, "task_loss": 0.4695407450199127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2008676677942276, "epoch": 0.62, "learning_rate": 4.555609225938897e-05, "loss": 0.2574, "step": 736, "task_loss": 0.5518381595611572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.361763060092926, "epoch": 0.62, "learning_rate": 4.5550054341263134e-05, "loss": 0.3001, "step": 737, "task_loss": 0.1056445986032486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22879339754581451, "epoch": 0.62, "learning_rate": 4.55440164231373e-05, "loss": 0.3551, "step": 738, "task_loss": 0.11415780335664749 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2307289093732834, "epoch": 0.62, "learning_rate": 4.5537978505011475e-05, "loss": 0.3782, "step": 739, "task_loss": 0.7190093398094177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25920188426971436, "epoch": 0.63, "learning_rate": 4.553194058688565e-05, "loss": 0.2649, "step": 740, "task_loss": 0.8742242455482483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38082191348075867, "epoch": 0.63, "learning_rate": 4.552590266875981e-05, "loss": 0.406, "step": 741, "task_loss": 0.03482038527727127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3060935437679291, "epoch": 0.63, "learning_rate": 4.5519864750633983e-05, "loss": 0.3288, "step": 742, "task_loss": 0.6914123892784119 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5238396525382996, "epoch": 0.63, "learning_rate": 4.551382683250816e-05, "loss": 0.4776, "step": 743, "task_loss": 1.4795682430267334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27936866879463196, "epoch": 0.63, "learning_rate": 4.550778891438232e-05, "loss": 0.3048, "step": 744, "task_loss": 1.3938137292861938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3178672790527344, "epoch": 0.63, "learning_rate": 4.550175099625649e-05, "loss": 0.3301, "step": 745, "task_loss": 0.37818965315818787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16338595747947693, "epoch": 0.63, "learning_rate": 4.5495713078130666e-05, "loss": 0.3039, "step": 746, "task_loss": 0.9244486093521118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2908826470375061, "epoch": 0.63, "learning_rate": 4.548967516000483e-05, "loss": 0.3619, "step": 747, "task_loss": 0.3686007559299469 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47517821192741394, "epoch": 0.63, "learning_rate": 4.5483637241879e-05, "loss": 0.4491, "step": 748, "task_loss": 0.8115469813346863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35118043422698975, "epoch": 0.63, "learning_rate": 4.5477599323753174e-05, "loss": 0.2838, "step": 749, "task_loss": 0.28180408477783203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3258613646030426, "epoch": 0.63, "learning_rate": 4.547156140562734e-05, "loss": 0.3335, "step": 750, "task_loss": 0.7023788690567017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4548594057559967, "epoch": 0.63, "learning_rate": 4.546552348750151e-05, "loss": 0.3094, "step": 751, "task_loss": 0.6205072402954102 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22849518060684204, "epoch": 0.64, "learning_rate": 4.545948556937568e-05, "loss": 0.2935, "step": 752, "task_loss": 0.36704325675964355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28524908423423767, "epoch": 0.64, "learning_rate": 4.5453447651249856e-05, "loss": 0.3569, "step": 753, "task_loss": 1.003019094467163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21455232799053192, "epoch": 0.64, "learning_rate": 4.544740973312402e-05, "loss": 0.2938, "step": 754, "task_loss": 0.5698435306549072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3069485127925873, "epoch": 0.64, "learning_rate": 4.544137181499819e-05, "loss": 0.331, "step": 755, "task_loss": 0.49196818470954895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14176684617996216, "epoch": 0.64, "learning_rate": 4.5435333896872365e-05, "loss": 0.3744, "step": 756, "task_loss": 0.6322250366210938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.293875515460968, "epoch": 0.64, "learning_rate": 4.5429295978746525e-05, "loss": 0.4177, "step": 757, "task_loss": 0.4707200825214386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2126864194869995, "epoch": 0.64, "learning_rate": 4.54232580606207e-05, "loss": 0.3057, "step": 758, "task_loss": 0.2094651609659195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19990329444408417, "epoch": 0.64, "learning_rate": 4.541722014249487e-05, "loss": 0.3262, "step": 759, "task_loss": 0.012853165157139301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35965561866760254, "epoch": 0.64, "learning_rate": 4.541118222436904e-05, "loss": 0.3644, "step": 760, "task_loss": 0.5527384281158447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23818881809711456, "epoch": 0.64, "learning_rate": 4.540514430624321e-05, "loss": 0.3757, "step": 761, "task_loss": 0.6005476713180542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35721302032470703, "epoch": 0.64, "learning_rate": 4.539910638811738e-05, "loss": 0.2104, "step": 762, "task_loss": 1.1094653606414795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37343737483024597, "epoch": 0.64, "learning_rate": 4.539306846999155e-05, "loss": 0.4348, "step": 763, "task_loss": 0.21267999708652496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2620397210121155, "epoch": 0.65, "learning_rate": 4.5387030551865716e-05, "loss": 0.3492, "step": 764, "task_loss": 0.2574564218521118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4616929292678833, "epoch": 0.65, "learning_rate": 4.538099263373989e-05, "loss": 0.3337, "step": 765, "task_loss": 0.27643245458602905 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36100324988365173, "epoch": 0.65, "learning_rate": 4.537495471561406e-05, "loss": 0.387, "step": 766, "task_loss": 0.5241384506225586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2915414571762085, "epoch": 0.65, "learning_rate": 4.5368916797488224e-05, "loss": 0.3417, "step": 767, "task_loss": 0.9579669237136841 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24912431836128235, "epoch": 0.65, "learning_rate": 4.53628788793624e-05, "loss": 0.3165, "step": 768, "task_loss": 0.3878670930862427 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.327561616897583, "epoch": 0.65, "learning_rate": 4.535684096123657e-05, "loss": 0.3618, "step": 769, "task_loss": 1.5790338516235352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37686747312545776, "epoch": 0.65, "learning_rate": 4.535080304311074e-05, "loss": 0.3744, "step": 770, "task_loss": 0.7178024053573608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3178812861442566, "epoch": 0.65, "learning_rate": 4.5344765124984906e-05, "loss": 0.3728, "step": 771, "task_loss": 0.4720331132411957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28083598613739014, "epoch": 0.65, "learning_rate": 4.533872720685908e-05, "loss": 0.2767, "step": 772, "task_loss": 0.7773228287696838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.262813538312912, "epoch": 0.65, "learning_rate": 4.533268928873325e-05, "loss": 0.3451, "step": 773, "task_loss": 0.23514148592948914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39996352791786194, "epoch": 0.65, "learning_rate": 4.5326651370607415e-05, "loss": 0.2898, "step": 774, "task_loss": 1.3049200773239136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26791954040527344, "epoch": 0.65, "learning_rate": 4.532061345248159e-05, "loss": 0.2978, "step": 775, "task_loss": 0.9358370304107666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4250520169734955, "epoch": 0.66, "learning_rate": 4.5314575534355756e-05, "loss": 0.2954, "step": 776, "task_loss": 0.6315026879310608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16271524131298065, "epoch": 0.66, "learning_rate": 4.530853761622992e-05, "loss": 0.2728, "step": 777, "task_loss": 0.18770042061805725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48636695742607117, "epoch": 0.66, "learning_rate": 4.53024996981041e-05, "loss": 0.354, "step": 778, "task_loss": 0.8299375176429749 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47003239393234253, "epoch": 0.66, "learning_rate": 4.5296461779978264e-05, "loss": 0.3491, "step": 779, "task_loss": 0.8216059803962708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31818997859954834, "epoch": 0.66, "learning_rate": 4.529042386185244e-05, "loss": 0.2505, "step": 780, "task_loss": 0.19926393032073975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16001421213150024, "epoch": 0.66, "learning_rate": 4.5284385943726605e-05, "loss": 0.3422, "step": 781, "task_loss": 0.4171951115131378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34718528389930725, "epoch": 0.66, "learning_rate": 4.527834802560077e-05, "loss": 0.307, "step": 782, "task_loss": 1.167810082435608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25509899854660034, "epoch": 0.66, "learning_rate": 4.5272310107474946e-05, "loss": 0.2887, "step": 783, "task_loss": 0.15111640095710754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3569784462451935, "epoch": 0.66, "learning_rate": 4.5266272189349114e-05, "loss": 0.4331, "step": 784, "task_loss": 0.3658720850944519 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28241288661956787, "epoch": 0.66, "learning_rate": 4.526023427122328e-05, "loss": 0.3262, "step": 785, "task_loss": 0.8569366335868835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14150623977184296, "epoch": 0.66, "learning_rate": 4.5254196353097455e-05, "loss": 0.3405, "step": 786, "task_loss": 0.08014193922281265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2618214190006256, "epoch": 0.66, "learning_rate": 4.524815843497162e-05, "loss": 0.3235, "step": 787, "task_loss": 0.38326963782310486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2664749026298523, "epoch": 0.67, "learning_rate": 4.5242120516845796e-05, "loss": 0.3104, "step": 788, "task_loss": 0.3011274039745331 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22440405189990997, "epoch": 0.67, "learning_rate": 4.523608259871996e-05, "loss": 0.291, "step": 789, "task_loss": 0.4468420445919037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2097805142402649, "epoch": 0.67, "learning_rate": 4.523004468059414e-05, "loss": 0.2935, "step": 790, "task_loss": 1.0426483154296875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36187127232551575, "epoch": 0.67, "learning_rate": 4.5224006762468304e-05, "loss": 0.4071, "step": 791, "task_loss": 0.301662415266037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26355308294296265, "epoch": 0.67, "learning_rate": 4.521796884434247e-05, "loss": 0.2925, "step": 792, "task_loss": 1.056702971458435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2969459295272827, "epoch": 0.67, "learning_rate": 4.5211930926216645e-05, "loss": 0.2929, "step": 793, "task_loss": 0.6666660308837891 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3254457712173462, "epoch": 0.67, "learning_rate": 4.520589300809081e-05, "loss": 0.3673, "step": 794, "task_loss": 0.5708363652229309 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2669367492198944, "epoch": 0.67, "learning_rate": 4.519985508996498e-05, "loss": 0.345, "step": 795, "task_loss": 0.31448039412498474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22961872816085815, "epoch": 0.67, "learning_rate": 4.5193817171839154e-05, "loss": 0.2509, "step": 796, "task_loss": 0.5035539865493774 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28616762161254883, "epoch": 0.67, "learning_rate": 4.518777925371332e-05, "loss": 0.2989, "step": 797, "task_loss": 0.6960420608520508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4403529763221741, "epoch": 0.67, "learning_rate": 4.518174133558749e-05, "loss": 0.3768, "step": 798, "task_loss": 1.4525697231292725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2504516839981079, "epoch": 0.67, "learning_rate": 4.517570341746166e-05, "loss": 0.4524, "step": 799, "task_loss": 0.5512890219688416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11189919710159302, "epoch": 0.68, "learning_rate": 4.5169665499335836e-05, "loss": 0.2427, "step": 800, "task_loss": 0.5171324014663696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16751985251903534, "epoch": 0.68, "learning_rate": 4.5163627581209996e-05, "loss": 0.223, "step": 801, "task_loss": 0.14018379151821136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28737059235572815, "epoch": 0.68, "learning_rate": 4.515758966308417e-05, "loss": 0.3328, "step": 802, "task_loss": 0.5168519616127014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27396494150161743, "epoch": 0.68, "learning_rate": 4.5151551744958344e-05, "loss": 0.4182, "step": 803, "task_loss": 0.773862361907959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28407174348831177, "epoch": 0.68, "learning_rate": 4.514551382683251e-05, "loss": 0.2628, "step": 804, "task_loss": 1.213943600654602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2251850962638855, "epoch": 0.68, "learning_rate": 4.513947590870668e-05, "loss": 0.3167, "step": 805, "task_loss": 0.77642822265625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2771259844303131, "epoch": 0.68, "learning_rate": 4.513343799058085e-05, "loss": 0.3159, "step": 806, "task_loss": 0.37644729018211365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31473296880722046, "epoch": 0.68, "learning_rate": 4.512740007245502e-05, "loss": 0.3031, "step": 807, "task_loss": 0.26792672276496887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46007418632507324, "epoch": 0.68, "learning_rate": 4.512136215432919e-05, "loss": 0.3724, "step": 808, "task_loss": 0.8200263977050781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.502080500125885, "epoch": 0.68, "learning_rate": 4.511532423620336e-05, "loss": 0.2923, "step": 809, "task_loss": 0.9686033129692078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41281658411026, "epoch": 0.68, "learning_rate": 4.5109286318077535e-05, "loss": 0.2906, "step": 810, "task_loss": 0.7908458709716797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.13412374258041382, "epoch": 0.69, "learning_rate": 4.5103248399951695e-05, "loss": 0.3706, "step": 811, "task_loss": 0.007279766723513603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2368299961090088, "epoch": 0.69, "learning_rate": 4.509721048182587e-05, "loss": 0.4693, "step": 812, "task_loss": 0.22541406750679016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14658771455287933, "epoch": 0.69, "learning_rate": 4.509117256370004e-05, "loss": 0.2438, "step": 813, "task_loss": 0.11188501119613647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2627505362033844, "epoch": 0.69, "learning_rate": 4.5085134645574204e-05, "loss": 0.312, "step": 814, "task_loss": 0.5142026543617249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21195872128009796, "epoch": 0.69, "learning_rate": 4.507909672744838e-05, "loss": 0.3962, "step": 815, "task_loss": 0.043154001235961914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1703500896692276, "epoch": 0.69, "learning_rate": 4.507305880932255e-05, "loss": 0.362, "step": 816, "task_loss": 0.25529593229293823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2474195510149002, "epoch": 0.69, "learning_rate": 4.506702089119671e-05, "loss": 0.272, "step": 817, "task_loss": 0.7813493013381958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4284350275993347, "epoch": 0.69, "learning_rate": 4.5060982973070886e-05, "loss": 0.3527, "step": 818, "task_loss": 0.7589248418807983 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32722094655036926, "epoch": 0.69, "learning_rate": 4.505494505494506e-05, "loss": 0.2798, "step": 819, "task_loss": 0.8149327039718628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.460521399974823, "epoch": 0.69, "learning_rate": 4.504890713681923e-05, "loss": 0.3711, "step": 820, "task_loss": 0.3695608079433441 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18608522415161133, "epoch": 0.69, "learning_rate": 4.5042869218693394e-05, "loss": 0.2779, "step": 821, "task_loss": 0.5883318185806274 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3069416582584381, "epoch": 0.69, "learning_rate": 4.503683130056757e-05, "loss": 0.3247, "step": 822, "task_loss": 1.1408164501190186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3678193688392639, "epoch": 0.7, "learning_rate": 4.5030793382441735e-05, "loss": 0.3377, "step": 823, "task_loss": 0.29889464378356934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42544400691986084, "epoch": 0.7, "learning_rate": 4.50247554643159e-05, "loss": 0.2954, "step": 824, "task_loss": 0.578364372253418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19642537832260132, "epoch": 0.7, "learning_rate": 4.5018717546190076e-05, "loss": 0.2223, "step": 825, "task_loss": 0.34300264716148376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2339569628238678, "epoch": 0.7, "learning_rate": 4.501267962806425e-05, "loss": 0.2709, "step": 826, "task_loss": 0.4922509491443634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45247703790664673, "epoch": 0.7, "learning_rate": 4.500664170993841e-05, "loss": 0.3732, "step": 827, "task_loss": 1.0907021760940552 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5683188438415527, "epoch": 0.7, "learning_rate": 4.5000603791812585e-05, "loss": 0.3586, "step": 828, "task_loss": 0.4494231641292572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.326862633228302, "epoch": 0.7, "learning_rate": 4.499456587368676e-05, "loss": 0.3712, "step": 829, "task_loss": 0.6807838082313538 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38523659110069275, "epoch": 0.7, "learning_rate": 4.4988527955560926e-05, "loss": 0.2747, "step": 830, "task_loss": 0.4024755656719208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3292922377586365, "epoch": 0.7, "learning_rate": 4.498249003743509e-05, "loss": 0.3534, "step": 831, "task_loss": 1.1196929216384888 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20400089025497437, "epoch": 0.7, "learning_rate": 4.497645211930927e-05, "loss": 0.2745, "step": 832, "task_loss": 0.4570884704589844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29592210054397583, "epoch": 0.7, "learning_rate": 4.4970414201183434e-05, "loss": 0.3936, "step": 833, "task_loss": 1.0568746328353882 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4801669120788574, "epoch": 0.7, "learning_rate": 4.49643762830576e-05, "loss": 0.3581, "step": 834, "task_loss": 0.45878103375434875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15121231973171234, "epoch": 0.71, "learning_rate": 4.4958338364931775e-05, "loss": 0.2983, "step": 835, "task_loss": 0.44923800230026245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2632133364677429, "epoch": 0.71, "learning_rate": 4.495230044680594e-05, "loss": 0.2685, "step": 836, "task_loss": 0.9024428129196167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39137107133865356, "epoch": 0.71, "learning_rate": 4.494626252868011e-05, "loss": 0.3816, "step": 837, "task_loss": 0.6369484663009644 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2737196087837219, "epoch": 0.71, "learning_rate": 4.4940224610554284e-05, "loss": 0.3795, "step": 838, "task_loss": 0.8843077421188354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41984057426452637, "epoch": 0.71, "learning_rate": 4.493418669242845e-05, "loss": 0.3426, "step": 839, "task_loss": 0.2210027128458023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3028753101825714, "epoch": 0.71, "learning_rate": 4.4928148774302625e-05, "loss": 0.3097, "step": 840, "task_loss": 0.9604801535606384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25425541400909424, "epoch": 0.71, "learning_rate": 4.492211085617679e-05, "loss": 0.4051, "step": 841, "task_loss": 0.6949500441551208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5665708780288696, "epoch": 0.71, "learning_rate": 4.4916072938050966e-05, "loss": 0.467, "step": 842, "task_loss": 0.6010175943374634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35591360926628113, "epoch": 0.71, "learning_rate": 4.491003501992513e-05, "loss": 0.4164, "step": 843, "task_loss": 0.19902320206165314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19289512932300568, "epoch": 0.71, "learning_rate": 4.49039971017993e-05, "loss": 0.2944, "step": 844, "task_loss": 0.05743429809808731 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37886670231819153, "epoch": 0.71, "learning_rate": 4.4897959183673474e-05, "loss": 0.3289, "step": 845, "task_loss": 0.39086824655532837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.62131267786026, "epoch": 0.71, "learning_rate": 4.489192126554764e-05, "loss": 0.4675, "step": 846, "task_loss": 0.5526881217956543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21981839835643768, "epoch": 0.72, "learning_rate": 4.488588334742181e-05, "loss": 0.2422, "step": 847, "task_loss": 1.249346137046814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23444175720214844, "epoch": 0.72, "learning_rate": 4.487984542929598e-05, "loss": 0.3446, "step": 848, "task_loss": 1.0017846822738647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5805321335792542, "epoch": 0.72, "learning_rate": 4.487380751117015e-05, "loss": 0.3958, "step": 849, "task_loss": 0.8312202095985413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21968956291675568, "epoch": 0.72, "learning_rate": 4.4867769593044324e-05, "loss": 0.2865, "step": 850, "task_loss": 1.0145660638809204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34121930599212646, "epoch": 0.72, "learning_rate": 4.486173167491849e-05, "loss": 0.4294, "step": 851, "task_loss": 1.0772480964660645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14664901793003082, "epoch": 0.72, "learning_rate": 4.485569375679266e-05, "loss": 0.271, "step": 852, "task_loss": 0.21183711290359497 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2009272575378418, "epoch": 0.72, "learning_rate": 4.484965583866683e-05, "loss": 0.284, "step": 853, "task_loss": 0.16705362498760223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2767512798309326, "epoch": 0.72, "learning_rate": 4.4843617920541e-05, "loss": 0.232, "step": 854, "task_loss": 0.15811513364315033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35277971625328064, "epoch": 0.72, "learning_rate": 4.4837580002415166e-05, "loss": 0.38, "step": 855, "task_loss": 0.745877206325531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29185330867767334, "epoch": 0.72, "learning_rate": 4.483154208428934e-05, "loss": 0.2369, "step": 856, "task_loss": 0.110472671687603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4995744228363037, "epoch": 0.72, "learning_rate": 4.482550416616351e-05, "loss": 0.454, "step": 857, "task_loss": 0.5353697538375854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35618507862091064, "epoch": 0.72, "learning_rate": 4.4819466248037675e-05, "loss": 0.4267, "step": 858, "task_loss": 0.8851766586303711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2918955683708191, "epoch": 0.73, "learning_rate": 4.481342832991185e-05, "loss": 0.2734, "step": 859, "task_loss": 1.0571379661560059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2450220286846161, "epoch": 0.73, "learning_rate": 4.480739041178602e-05, "loss": 0.2618, "step": 860, "task_loss": 0.3525579273700714 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37860098481178284, "epoch": 0.73, "learning_rate": 4.480135249366019e-05, "loss": 0.3613, "step": 861, "task_loss": 0.12457795441150665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33134353160858154, "epoch": 0.73, "learning_rate": 4.479531457553436e-05, "loss": 0.3581, "step": 862, "task_loss": 0.8927803635597229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4402463436126709, "epoch": 0.73, "learning_rate": 4.478927665740853e-05, "loss": 0.3096, "step": 863, "task_loss": 0.1979515105485916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2574456036090851, "epoch": 0.73, "learning_rate": 4.47832387392827e-05, "loss": 0.2779, "step": 864, "task_loss": 0.3261217176914215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2690635323524475, "epoch": 0.73, "learning_rate": 4.4777200821156865e-05, "loss": 0.3375, "step": 865, "task_loss": 0.9131759405136108 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12055150419473648, "epoch": 0.73, "learning_rate": 4.477116290303104e-05, "loss": 0.2671, "step": 866, "task_loss": 0.2828481197357178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2493239939212799, "epoch": 0.73, "learning_rate": 4.4765124984905207e-05, "loss": 0.2815, "step": 867, "task_loss": 1.1616557836532593 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2806982100009918, "epoch": 0.73, "learning_rate": 4.4759087066779374e-05, "loss": 0.3573, "step": 868, "task_loss": 0.12370504438877106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4030117392539978, "epoch": 0.73, "learning_rate": 4.475304914865355e-05, "loss": 0.3586, "step": 869, "task_loss": 0.48109257221221924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19204652309417725, "epoch": 0.73, "learning_rate": 4.474701123052772e-05, "loss": 0.3073, "step": 870, "task_loss": 0.5172537565231323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17541727423667908, "epoch": 0.74, "learning_rate": 4.474097331240188e-05, "loss": 0.2326, "step": 871, "task_loss": 1.2272226810455322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32953184843063354, "epoch": 0.74, "learning_rate": 4.4734935394276056e-05, "loss": 0.3549, "step": 872, "task_loss": 0.5047439336776733 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1677732914686203, "epoch": 0.74, "learning_rate": 4.472889747615023e-05, "loss": 0.2378, "step": 873, "task_loss": 0.5821611881256104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18654273450374603, "epoch": 0.74, "learning_rate": 4.472285955802439e-05, "loss": 0.2842, "step": 874, "task_loss": 0.48935747146606445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32899102568626404, "epoch": 0.74, "learning_rate": 4.4716821639898564e-05, "loss": 0.3376, "step": 875, "task_loss": 0.49228084087371826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3301312029361725, "epoch": 0.74, "learning_rate": 4.471078372177274e-05, "loss": 0.2932, "step": 876, "task_loss": 0.3888969421386719 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1497659683227539, "epoch": 0.74, "learning_rate": 4.4704745803646905e-05, "loss": 0.2931, "step": 877, "task_loss": 0.6856570243835449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2944922149181366, "epoch": 0.74, "learning_rate": 4.469870788552107e-05, "loss": 0.3272, "step": 878, "task_loss": 0.7669559717178345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3676148056983948, "epoch": 0.74, "learning_rate": 4.469266996739525e-05, "loss": 0.3292, "step": 879, "task_loss": 0.5902640223503113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5524145364761353, "epoch": 0.74, "learning_rate": 4.4686632049269414e-05, "loss": 0.4101, "step": 880, "task_loss": 0.774904191493988 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2837039828300476, "epoch": 0.74, "learning_rate": 4.468059413114358e-05, "loss": 0.2468, "step": 881, "task_loss": 0.2565593719482422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32959577441215515, "epoch": 0.75, "learning_rate": 4.4674556213017755e-05, "loss": 0.3325, "step": 882, "task_loss": 0.310451865196228 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20071756839752197, "epoch": 0.75, "learning_rate": 4.466851829489193e-05, "loss": 0.2603, "step": 883, "task_loss": 0.6266161799430847 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24892960488796234, "epoch": 0.75, "learning_rate": 4.466248037676609e-05, "loss": 0.2713, "step": 884, "task_loss": 0.2011856883764267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28083887696266174, "epoch": 0.75, "learning_rate": 4.465644245864026e-05, "loss": 0.2966, "step": 885, "task_loss": 1.5609312057495117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32567310333251953, "epoch": 0.75, "learning_rate": 4.465040454051444e-05, "loss": 0.2643, "step": 886, "task_loss": 0.7791790962219238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3462940454483032, "epoch": 0.75, "learning_rate": 4.46443666223886e-05, "loss": 0.3486, "step": 887, "task_loss": 0.4980553090572357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.281302273273468, "epoch": 0.75, "learning_rate": 4.463832870426277e-05, "loss": 0.2757, "step": 888, "task_loss": 0.5321588516235352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20592188835144043, "epoch": 0.75, "learning_rate": 4.4632290786136946e-05, "loss": 0.3053, "step": 889, "task_loss": 0.5561092495918274 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23646581172943115, "epoch": 0.75, "learning_rate": 4.4626252868011106e-05, "loss": 0.3491, "step": 890, "task_loss": 0.41851186752319336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36811643838882446, "epoch": 0.75, "learning_rate": 4.462021494988528e-05, "loss": 0.3576, "step": 891, "task_loss": 1.5587043762207031 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20927318930625916, "epoch": 0.75, "learning_rate": 4.4614177031759454e-05, "loss": 0.2822, "step": 892, "task_loss": 0.7147969603538513 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.251226544380188, "epoch": 0.75, "learning_rate": 4.460813911363362e-05, "loss": 0.4231, "step": 893, "task_loss": 0.366676390171051 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2869289815425873, "epoch": 0.76, "learning_rate": 4.460210119550779e-05, "loss": 0.3511, "step": 894, "task_loss": 1.2641568183898926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39927786588668823, "epoch": 0.76, "learning_rate": 4.459606327738196e-05, "loss": 0.3323, "step": 895, "task_loss": 0.31022870540618896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2870728075504303, "epoch": 0.76, "learning_rate": 4.459002535925613e-05, "loss": 0.3267, "step": 896, "task_loss": 0.33017101883888245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4244043529033661, "epoch": 0.76, "learning_rate": 4.4583987441130297e-05, "loss": 0.3144, "step": 897, "task_loss": 1.3175626993179321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30392491817474365, "epoch": 0.76, "learning_rate": 4.457794952300447e-05, "loss": 0.3014, "step": 898, "task_loss": 0.1628706455230713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39524781703948975, "epoch": 0.76, "learning_rate": 4.4571911604878644e-05, "loss": 0.3135, "step": 899, "task_loss": 1.1435112953186035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3535827696323395, "epoch": 0.76, "learning_rate": 4.4565873686752805e-05, "loss": 0.3678, "step": 900, "task_loss": 0.330600768327713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2594134211540222, "epoch": 0.76, "learning_rate": 4.455983576862698e-05, "loss": 0.3456, "step": 901, "task_loss": 0.9072926044464111 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3696218430995941, "epoch": 0.76, "learning_rate": 4.455379785050115e-05, "loss": 0.2795, "step": 902, "task_loss": 0.48372340202331543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23877239227294922, "epoch": 0.76, "learning_rate": 4.454775993237532e-05, "loss": 0.2779, "step": 903, "task_loss": 0.6393103003501892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32777437567710876, "epoch": 0.76, "learning_rate": 4.454172201424949e-05, "loss": 0.3416, "step": 904, "task_loss": 0.8551406264305115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4088174104690552, "epoch": 0.76, "learning_rate": 4.453568409612366e-05, "loss": 0.2775, "step": 905, "task_loss": 0.8377326130867004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2635897696018219, "epoch": 0.77, "learning_rate": 4.452964617799783e-05, "loss": 0.4136, "step": 906, "task_loss": 1.1125253438949585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5768491625785828, "epoch": 0.77, "learning_rate": 4.4523608259871996e-05, "loss": 0.4635, "step": 907, "task_loss": 0.6605405807495117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4529709815979004, "epoch": 0.77, "learning_rate": 4.451757034174617e-05, "loss": 0.3997, "step": 908, "task_loss": 1.8350812196731567 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40680643916130066, "epoch": 0.77, "learning_rate": 4.451153242362034e-05, "loss": 0.4538, "step": 909, "task_loss": 0.7768993377685547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.231535866856575, "epoch": 0.77, "learning_rate": 4.4505494505494504e-05, "loss": 0.3273, "step": 910, "task_loss": 0.37125688791275024 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2575525641441345, "epoch": 0.77, "learning_rate": 4.449945658736868e-05, "loss": 0.304, "step": 911, "task_loss": 0.29679203033447266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33841806650161743, "epoch": 0.77, "learning_rate": 4.4493418669242845e-05, "loss": 0.3838, "step": 912, "task_loss": 0.5904703736305237 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.318535178899765, "epoch": 0.77, "learning_rate": 4.448738075111702e-05, "loss": 0.3426, "step": 913, "task_loss": 1.268672227859497 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22811497747898102, "epoch": 0.77, "learning_rate": 4.4481342832991186e-05, "loss": 0.2945, "step": 914, "task_loss": 1.0064010620117188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2658228874206543, "epoch": 0.77, "learning_rate": 4.447530491486535e-05, "loss": 0.2501, "step": 915, "task_loss": 0.9221341013908386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.271209180355072, "epoch": 0.77, "learning_rate": 4.446926699673953e-05, "loss": 0.2908, "step": 916, "task_loss": 0.6219085454940796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39445099234580994, "epoch": 0.77, "learning_rate": 4.4463229078613694e-05, "loss": 0.3741, "step": 917, "task_loss": 1.3790051937103271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2763742506504059, "epoch": 0.78, "learning_rate": 4.445719116048787e-05, "loss": 0.3558, "step": 918, "task_loss": 0.491455614566803 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.343190997838974, "epoch": 0.78, "learning_rate": 4.4451153242362036e-05, "loss": 0.3307, "step": 919, "task_loss": 1.0849103927612305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.657841145992279, "epoch": 0.78, "learning_rate": 4.44451153242362e-05, "loss": 0.3528, "step": 920, "task_loss": 1.3138108253479004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20869892835617065, "epoch": 0.78, "learning_rate": 4.443907740611038e-05, "loss": 0.2682, "step": 921, "task_loss": 0.6371769905090332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17357312142848969, "epoch": 0.78, "learning_rate": 4.4433039487984544e-05, "loss": 0.2564, "step": 922, "task_loss": 0.33497363328933716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2257077693939209, "epoch": 0.78, "learning_rate": 4.442700156985872e-05, "loss": 0.2882, "step": 923, "task_loss": 0.9911162853240967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2122732698917389, "epoch": 0.78, "learning_rate": 4.4420963651732885e-05, "loss": 0.3909, "step": 924, "task_loss": 1.3664565086364746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38081780076026917, "epoch": 0.78, "learning_rate": 4.441492573360705e-05, "loss": 0.4524, "step": 925, "task_loss": 1.2730913162231445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20022636651992798, "epoch": 0.78, "learning_rate": 4.4408887815481226e-05, "loss": 0.286, "step": 926, "task_loss": 0.5896009206771851 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3466823399066925, "epoch": 0.78, "learning_rate": 4.440284989735539e-05, "loss": 0.1977, "step": 927, "task_loss": 0.11348257958889008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3495258688926697, "epoch": 0.78, "learning_rate": 4.439681197922956e-05, "loss": 0.4108, "step": 928, "task_loss": 0.1774165779352188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6170074939727783, "epoch": 0.78, "learning_rate": 4.4390774061103735e-05, "loss": 0.446, "step": 929, "task_loss": 0.8056990504264832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38557907938957214, "epoch": 0.79, "learning_rate": 4.43847361429779e-05, "loss": 0.461, "step": 930, "task_loss": 1.6459150314331055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26434993743896484, "epoch": 0.79, "learning_rate": 4.437869822485207e-05, "loss": 0.3443, "step": 931, "task_loss": 0.7516822814941406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16699549555778503, "epoch": 0.79, "learning_rate": 4.437266030672624e-05, "loss": 0.2126, "step": 932, "task_loss": 0.14846821129322052 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.525314211845398, "epoch": 0.79, "learning_rate": 4.436662238860042e-05, "loss": 0.3949, "step": 933, "task_loss": 1.7494056224822998 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33138734102249146, "epoch": 0.79, "learning_rate": 4.4360584470474584e-05, "loss": 0.333, "step": 934, "task_loss": 0.4427955746650696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.375948429107666, "epoch": 0.79, "learning_rate": 4.435454655234875e-05, "loss": 0.3911, "step": 935, "task_loss": 0.29812103509902954 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2076297551393509, "epoch": 0.79, "learning_rate": 4.4348508634222925e-05, "loss": 0.352, "step": 936, "task_loss": 0.14911505579948425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12529835104942322, "epoch": 0.79, "learning_rate": 4.434247071609709e-05, "loss": 0.3564, "step": 937, "task_loss": 0.21458204090595245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5059237480163574, "epoch": 0.79, "learning_rate": 4.433643279797126e-05, "loss": 0.4174, "step": 938, "task_loss": 0.785790205001831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19054540991783142, "epoch": 0.79, "learning_rate": 4.4330394879845433e-05, "loss": 0.3267, "step": 939, "task_loss": 0.28141486644744873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3649073541164398, "epoch": 0.79, "learning_rate": 4.43243569617196e-05, "loss": 0.4151, "step": 940, "task_loss": 1.3084403276443481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2746647596359253, "epoch": 0.79, "learning_rate": 4.431831904359377e-05, "loss": 0.2759, "step": 941, "task_loss": 0.7171421051025391 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20765218138694763, "epoch": 0.8, "learning_rate": 4.431228112546794e-05, "loss": 0.2944, "step": 942, "task_loss": 0.4240565598011017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3797289729118347, "epoch": 0.8, "learning_rate": 4.4306243207342116e-05, "loss": 0.2999, "step": 943, "task_loss": 0.24071669578552246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4252301752567291, "epoch": 0.8, "learning_rate": 4.4300205289216276e-05, "loss": 0.3972, "step": 944, "task_loss": 0.5524641871452332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2376905381679535, "epoch": 0.8, "learning_rate": 4.429416737109045e-05, "loss": 0.3452, "step": 945, "task_loss": 0.6807569861412048 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3007066547870636, "epoch": 0.8, "learning_rate": 4.4288129452964624e-05, "loss": 0.2842, "step": 946, "task_loss": 0.48575809597969055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25682830810546875, "epoch": 0.8, "learning_rate": 4.4282091534838784e-05, "loss": 0.2786, "step": 947, "task_loss": 0.38475286960601807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23255904018878937, "epoch": 0.8, "learning_rate": 4.427605361671296e-05, "loss": 0.3274, "step": 948, "task_loss": 1.3730823993682861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24587060511112213, "epoch": 0.8, "learning_rate": 4.427001569858713e-05, "loss": 0.3018, "step": 949, "task_loss": 0.5464277267456055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18896152079105377, "epoch": 0.8, "learning_rate": 4.42639777804613e-05, "loss": 0.2543, "step": 950, "task_loss": 0.6618168354034424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2069229781627655, "epoch": 0.8, "learning_rate": 4.425793986233547e-05, "loss": 0.3133, "step": 951, "task_loss": 0.4751676619052887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30832594633102417, "epoch": 0.8, "learning_rate": 4.425190194420964e-05, "loss": 0.3056, "step": 952, "task_loss": 0.39860960841178894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15175822377204895, "epoch": 0.81, "learning_rate": 4.424586402608381e-05, "loss": 0.3169, "step": 953, "task_loss": 0.43376606702804565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31668588519096375, "epoch": 0.81, "learning_rate": 4.4239826107957975e-05, "loss": 0.3419, "step": 954, "task_loss": 0.9416754245758057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3224894404411316, "epoch": 0.81, "learning_rate": 4.423378818983215e-05, "loss": 0.3548, "step": 955, "task_loss": 0.41079917550086975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1207728236913681, "epoch": 0.81, "learning_rate": 4.422775027170632e-05, "loss": 0.2373, "step": 956, "task_loss": 0.018194148316979408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19165289402008057, "epoch": 0.81, "learning_rate": 4.4221712353580483e-05, "loss": 0.2904, "step": 957, "task_loss": 0.7561900019645691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47163888812065125, "epoch": 0.81, "learning_rate": 4.421567443545466e-05, "loss": 0.3724, "step": 958, "task_loss": 0.6099743247032166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23408189415931702, "epoch": 0.81, "learning_rate": 4.420963651732883e-05, "loss": 0.3644, "step": 959, "task_loss": 0.35257381200790405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22687366604804993, "epoch": 0.81, "learning_rate": 4.420359859920299e-05, "loss": 0.3177, "step": 960, "task_loss": 0.331930011510849 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2910210192203522, "epoch": 0.81, "learning_rate": 4.4197560681077166e-05, "loss": 0.3103, "step": 961, "task_loss": 0.04620639234781265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3682945966720581, "epoch": 0.81, "learning_rate": 4.419152276295134e-05, "loss": 0.3143, "step": 962, "task_loss": 0.4286225140094757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2485480010509491, "epoch": 0.81, "learning_rate": 4.418548484482551e-05, "loss": 0.3692, "step": 963, "task_loss": 1.0280723571777344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1769065111875534, "epoch": 0.81, "learning_rate": 4.4179446926699674e-05, "loss": 0.285, "step": 964, "task_loss": 0.13104857504367828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24676114320755005, "epoch": 0.82, "learning_rate": 4.417340900857385e-05, "loss": 0.3134, "step": 965, "task_loss": 0.5934781432151794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5543673634529114, "epoch": 0.82, "learning_rate": 4.4167371090448015e-05, "loss": 0.3826, "step": 966, "task_loss": 1.803892970085144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17848938703536987, "epoch": 0.82, "learning_rate": 4.416133317232218e-05, "loss": 0.296, "step": 967, "task_loss": 0.4514574110507965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17026172578334808, "epoch": 0.82, "learning_rate": 4.4155295254196356e-05, "loss": 0.2717, "step": 968, "task_loss": 0.5312502980232239 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29398059844970703, "epoch": 0.82, "learning_rate": 4.4149257336070523e-05, "loss": 0.2934, "step": 969, "task_loss": 1.191516637802124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19883738458156586, "epoch": 0.82, "learning_rate": 4.414321941794469e-05, "loss": 0.2513, "step": 970, "task_loss": 0.42799901962280273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3953816890716553, "epoch": 0.82, "learning_rate": 4.4137181499818865e-05, "loss": 0.396, "step": 971, "task_loss": 0.6207815408706665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3231390714645386, "epoch": 0.82, "learning_rate": 4.413114358169304e-05, "loss": 0.3073, "step": 972, "task_loss": 0.3937745988368988 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5208315849304199, "epoch": 0.82, "learning_rate": 4.4125105663567206e-05, "loss": 0.3432, "step": 973, "task_loss": 0.5548078417778015 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19812807440757751, "epoch": 0.82, "learning_rate": 4.411906774544137e-05, "loss": 0.3291, "step": 974, "task_loss": 0.20434805750846863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3594655692577362, "epoch": 0.82, "learning_rate": 4.411302982731555e-05, "loss": 0.2766, "step": 975, "task_loss": 0.3989031910896301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1778561770915985, "epoch": 0.82, "learning_rate": 4.4106991909189714e-05, "loss": 0.3613, "step": 976, "task_loss": 0.5817642211914062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15779995918273926, "epoch": 0.83, "learning_rate": 4.410095399106388e-05, "loss": 0.2766, "step": 977, "task_loss": 1.4115668535232544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21007570624351501, "epoch": 0.83, "learning_rate": 4.4094916072938055e-05, "loss": 0.2318, "step": 978, "task_loss": 0.672278881072998 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22853948175907135, "epoch": 0.83, "learning_rate": 4.408887815481222e-05, "loss": 0.3024, "step": 979, "task_loss": 0.2280975729227066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2639276385307312, "epoch": 0.83, "learning_rate": 4.408284023668639e-05, "loss": 0.2811, "step": 980, "task_loss": 1.1780911684036255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2992544174194336, "epoch": 0.83, "learning_rate": 4.4076802318560564e-05, "loss": 0.3236, "step": 981, "task_loss": 1.5156692266464233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4555066227912903, "epoch": 0.83, "learning_rate": 4.407076440043473e-05, "loss": 0.332, "step": 982, "task_loss": 0.32221606373786926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26421698927879333, "epoch": 0.83, "learning_rate": 4.4064726482308905e-05, "loss": 0.2934, "step": 983, "task_loss": 0.4709763526916504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18341732025146484, "epoch": 0.83, "learning_rate": 4.405868856418307e-05, "loss": 0.4569, "step": 984, "task_loss": 0.7482260465621948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4706900119781494, "epoch": 0.83, "learning_rate": 4.405265064605724e-05, "loss": 0.381, "step": 985, "task_loss": 1.4191635847091675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28539741039276123, "epoch": 0.83, "learning_rate": 4.404661272793141e-05, "loss": 0.3199, "step": 986, "task_loss": 1.2112116813659668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4704607427120209, "epoch": 0.83, "learning_rate": 4.404057480980558e-05, "loss": 0.3794, "step": 987, "task_loss": 0.8687427043914795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3524262309074402, "epoch": 0.83, "learning_rate": 4.403453689167975e-05, "loss": 0.3255, "step": 988, "task_loss": 0.5620470643043518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38292771577835083, "epoch": 0.84, "learning_rate": 4.402849897355392e-05, "loss": 0.3439, "step": 989, "task_loss": 1.002793788909912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3935520052909851, "epoch": 0.84, "learning_rate": 4.402246105542809e-05, "loss": 0.292, "step": 990, "task_loss": 0.8700146675109863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19333480298519135, "epoch": 0.84, "learning_rate": 4.401642313730226e-05, "loss": 0.2459, "step": 991, "task_loss": 0.6943432092666626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2051442563533783, "epoch": 0.84, "learning_rate": 4.401038521917643e-05, "loss": 0.2665, "step": 992, "task_loss": 0.925590455532074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45900315046310425, "epoch": 0.84, "learning_rate": 4.4004347301050604e-05, "loss": 0.3111, "step": 993, "task_loss": 0.20756827294826508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34722113609313965, "epoch": 0.84, "learning_rate": 4.399830938292477e-05, "loss": 0.3075, "step": 994, "task_loss": 1.1074647903442383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27422356605529785, "epoch": 0.84, "learning_rate": 4.399227146479894e-05, "loss": 0.333, "step": 995, "task_loss": 0.36270856857299805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2909950911998749, "epoch": 0.84, "learning_rate": 4.398623354667311e-05, "loss": 0.2856, "step": 996, "task_loss": 0.4288698732852936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3258090615272522, "epoch": 0.84, "learning_rate": 4.398019562854728e-05, "loss": 0.3713, "step": 997, "task_loss": 0.8326727747917175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25224539637565613, "epoch": 0.84, "learning_rate": 4.3974157710421446e-05, "loss": 0.2836, "step": 998, "task_loss": 1.4559462070465088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4141561686992645, "epoch": 0.84, "learning_rate": 4.396811979229562e-05, "loss": 0.3515, "step": 999, "task_loss": 1.1505557298660278 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35155683755874634, "epoch": 0.84, "learning_rate": 4.396208187416979e-05, "loss": 0.2891, "step": 1000, "task_loss": 0.46846193075180054 }, { "epoch": 0.84, "eval_accuracy": 0.9137029702970297, "eval_loss": 0.19122548401355743, "eval_runtime": 329.2855, "eval_samples_per_second": 76.681, "eval_steps_per_second": 0.601, "step": 1000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3391488194465637, "epoch": 0.85, "learning_rate": 4.3956043956043955e-05, "loss": 0.3169, "step": 1001, "task_loss": 0.45803603529930115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22156934440135956, "epoch": 0.85, "learning_rate": 4.395000603791813e-05, "loss": 0.2767, "step": 1002, "task_loss": 0.5238223671913147 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20765940845012665, "epoch": 0.85, "learning_rate": 4.39439681197923e-05, "loss": 0.3599, "step": 1003, "task_loss": 0.29685527086257935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32182827591896057, "epoch": 0.85, "learning_rate": 4.393793020166646e-05, "loss": 0.3594, "step": 1004, "task_loss": 0.6330575942993164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.054391622543335, "epoch": 0.85, "learning_rate": 4.393189228354064e-05, "loss": 0.4714, "step": 1005, "task_loss": 1.5936485528945923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5592337846755981, "epoch": 0.85, "learning_rate": 4.392585436541481e-05, "loss": 0.3528, "step": 1006, "task_loss": 0.09659402817487717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20363977551460266, "epoch": 0.85, "learning_rate": 4.391981644728898e-05, "loss": 0.2805, "step": 1007, "task_loss": 0.3443560004234314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3813174366950989, "epoch": 0.85, "learning_rate": 4.3913778529163145e-05, "loss": 0.3873, "step": 1008, "task_loss": 0.63079833984375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3403828740119934, "epoch": 0.85, "learning_rate": 4.390774061103732e-05, "loss": 0.3214, "step": 1009, "task_loss": 1.059286117553711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34724146127700806, "epoch": 0.85, "learning_rate": 4.3901702692911486e-05, "loss": 0.4536, "step": 1010, "task_loss": 1.3546801805496216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4455212950706482, "epoch": 0.85, "learning_rate": 4.3895664774785654e-05, "loss": 0.4005, "step": 1011, "task_loss": 1.0417988300323486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2075812816619873, "epoch": 0.85, "learning_rate": 4.388962685665983e-05, "loss": 0.2975, "step": 1012, "task_loss": 0.1890903115272522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1141534298658371, "epoch": 0.86, "learning_rate": 4.3883588938534e-05, "loss": 0.2382, "step": 1013, "task_loss": 0.15308374166488647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19643740355968475, "epoch": 0.86, "learning_rate": 4.387755102040816e-05, "loss": 0.2969, "step": 1014, "task_loss": 0.21655401587486267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14894647896289825, "epoch": 0.86, "learning_rate": 4.3871513102282336e-05, "loss": 0.279, "step": 1015, "task_loss": 0.9188229441642761 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34093791246414185, "epoch": 0.86, "learning_rate": 4.386547518415651e-05, "loss": 0.4014, "step": 1016, "task_loss": 0.5148928165435791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17937791347503662, "epoch": 0.86, "learning_rate": 4.385943726603067e-05, "loss": 0.3511, "step": 1017, "task_loss": 0.07543644309043884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3252143859863281, "epoch": 0.86, "learning_rate": 4.3853399347904844e-05, "loss": 0.3454, "step": 1018, "task_loss": 0.8916002511978149 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3843550682067871, "epoch": 0.86, "learning_rate": 4.384736142977902e-05, "loss": 0.4296, "step": 1019, "task_loss": 0.3139152526855469 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18885429203510284, "epoch": 0.86, "learning_rate": 4.384132351165318e-05, "loss": 0.2847, "step": 1020, "task_loss": 0.8368679285049438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16177509725093842, "epoch": 0.86, "learning_rate": 4.383528559352735e-05, "loss": 0.3119, "step": 1021, "task_loss": 0.16134583950042725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.10936792194843292, "epoch": 0.86, "learning_rate": 4.3829247675401526e-05, "loss": 0.2461, "step": 1022, "task_loss": 0.006842827424407005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3140726685523987, "epoch": 0.86, "learning_rate": 4.3823209757275694e-05, "loss": 0.2709, "step": 1023, "task_loss": 0.8564401865005493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2307685762643814, "epoch": 0.87, "learning_rate": 4.381717183914986e-05, "loss": 0.2613, "step": 1024, "task_loss": 0.2400742769241333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14686830341815948, "epoch": 0.87, "learning_rate": 4.3811133921024035e-05, "loss": 0.2809, "step": 1025, "task_loss": 0.6212365031242371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36377811431884766, "epoch": 0.87, "learning_rate": 4.38050960028982e-05, "loss": 0.3472, "step": 1026, "task_loss": 0.21897917985916138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3279937505722046, "epoch": 0.87, "learning_rate": 4.379905808477237e-05, "loss": 0.314, "step": 1027, "task_loss": 0.3705894351005554 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.441039502620697, "epoch": 0.87, "learning_rate": 4.379302016664654e-05, "loss": 0.4532, "step": 1028, "task_loss": 0.6643653512001038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21006743609905243, "epoch": 0.87, "learning_rate": 4.378698224852072e-05, "loss": 0.3213, "step": 1029, "task_loss": 0.6860018968582153 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49603691697120667, "epoch": 0.87, "learning_rate": 4.378094433039488e-05, "loss": 0.4618, "step": 1030, "task_loss": 0.8266454339027405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22908753156661987, "epoch": 0.87, "learning_rate": 4.377490641226905e-05, "loss": 0.3503, "step": 1031, "task_loss": 0.3216218948364258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5637189745903015, "epoch": 0.87, "learning_rate": 4.3768868494143225e-05, "loss": 0.3615, "step": 1032, "task_loss": 0.16365796327590942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2606871724128723, "epoch": 0.87, "learning_rate": 4.376283057601739e-05, "loss": 0.4141, "step": 1033, "task_loss": 0.2674388885498047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4614213705062866, "epoch": 0.87, "learning_rate": 4.375679265789156e-05, "loss": 0.3725, "step": 1034, "task_loss": 0.7040387392044067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21910136938095093, "epoch": 0.87, "learning_rate": 4.3750754739765734e-05, "loss": 0.2722, "step": 1035, "task_loss": 0.6325879096984863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2930923104286194, "epoch": 0.88, "learning_rate": 4.37447168216399e-05, "loss": 0.3799, "step": 1036, "task_loss": 0.3767751157283783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30730485916137695, "epoch": 0.88, "learning_rate": 4.373867890351407e-05, "loss": 0.4391, "step": 1037, "task_loss": 1.375309944152832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1575387418270111, "epoch": 0.88, "learning_rate": 4.373264098538824e-05, "loss": 0.2939, "step": 1038, "task_loss": 0.17976133525371552 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3143768310546875, "epoch": 0.88, "learning_rate": 4.372660306726241e-05, "loss": 0.3635, "step": 1039, "task_loss": 0.6070629358291626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2287972867488861, "epoch": 0.88, "learning_rate": 4.3720565149136576e-05, "loss": 0.4331, "step": 1040, "task_loss": 1.0047622919082642 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22861576080322266, "epoch": 0.88, "learning_rate": 4.371452723101075e-05, "loss": 0.3668, "step": 1041, "task_loss": 0.34429532289505005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5333127379417419, "epoch": 0.88, "learning_rate": 4.370848931288492e-05, "loss": 0.3212, "step": 1042, "task_loss": 1.0081291198730469 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2574305534362793, "epoch": 0.88, "learning_rate": 4.370245139475909e-05, "loss": 0.3002, "step": 1043, "task_loss": 0.3854750990867615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23126491904258728, "epoch": 0.88, "learning_rate": 4.369641347663326e-05, "loss": 0.3038, "step": 1044, "task_loss": 0.5102897882461548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21459677815437317, "epoch": 0.88, "learning_rate": 4.3690375558507426e-05, "loss": 0.3225, "step": 1045, "task_loss": 0.6645446419715881 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43234217166900635, "epoch": 0.88, "learning_rate": 4.36843376403816e-05, "loss": 0.3672, "step": 1046, "task_loss": 0.8873173594474792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2908895015716553, "epoch": 0.88, "learning_rate": 4.367829972225577e-05, "loss": 0.3714, "step": 1047, "task_loss": 0.06387089937925339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2915533781051636, "epoch": 0.89, "learning_rate": 4.367226180412994e-05, "loss": 0.4101, "step": 1048, "task_loss": 0.40828245878219604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1827658861875534, "epoch": 0.89, "learning_rate": 4.366622388600411e-05, "loss": 0.3001, "step": 1049, "task_loss": 0.4963056445121765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2755945920944214, "epoch": 0.89, "learning_rate": 4.3660185967878275e-05, "loss": 0.3099, "step": 1050, "task_loss": 0.6600726842880249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2445630431175232, "epoch": 0.89, "learning_rate": 4.365414804975245e-05, "loss": 0.3186, "step": 1051, "task_loss": 0.46036696434020996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21081236004829407, "epoch": 0.89, "learning_rate": 4.3648110131626617e-05, "loss": 0.2855, "step": 1052, "task_loss": 0.09793613106012344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47108274698257446, "epoch": 0.89, "learning_rate": 4.364207221350079e-05, "loss": 0.3796, "step": 1053, "task_loss": 0.906611979007721 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41064777970314026, "epoch": 0.89, "learning_rate": 4.363603429537496e-05, "loss": 0.3341, "step": 1054, "task_loss": 0.9916089177131653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1975703239440918, "epoch": 0.89, "learning_rate": 4.3629996377249125e-05, "loss": 0.2804, "step": 1055, "task_loss": 0.0818234458565712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2746395170688629, "epoch": 0.89, "learning_rate": 4.36239584591233e-05, "loss": 0.2907, "step": 1056, "task_loss": 0.8361456394195557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6924027800559998, "epoch": 0.89, "learning_rate": 4.3617920540997466e-05, "loss": 0.3682, "step": 1057, "task_loss": 0.5733320116996765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34806808829307556, "epoch": 0.89, "learning_rate": 4.361188262287163e-05, "loss": 0.3427, "step": 1058, "task_loss": 0.986325740814209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17661121487617493, "epoch": 0.89, "learning_rate": 4.360584470474581e-05, "loss": 0.3588, "step": 1059, "task_loss": 0.19459593296051025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3229772448539734, "epoch": 0.9, "learning_rate": 4.3599806786619974e-05, "loss": 0.4412, "step": 1060, "task_loss": 1.3445671796798706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4605786204338074, "epoch": 0.9, "learning_rate": 4.359376886849414e-05, "loss": 0.3646, "step": 1061, "task_loss": 0.7669268846511841 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4128333330154419, "epoch": 0.9, "learning_rate": 4.3587730950368315e-05, "loss": 0.359, "step": 1062, "task_loss": 0.7865899205207825 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3837454617023468, "epoch": 0.9, "learning_rate": 4.358169303224248e-05, "loss": 0.3225, "step": 1063, "task_loss": 0.32757288217544556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5358129739761353, "epoch": 0.9, "learning_rate": 4.3575655114116657e-05, "loss": 0.4363, "step": 1064, "task_loss": 1.422524094581604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2206120789051056, "epoch": 0.9, "learning_rate": 4.3569617195990824e-05, "loss": 0.4008, "step": 1065, "task_loss": 0.6304378509521484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.622882068157196, "epoch": 0.9, "learning_rate": 4.3563579277865e-05, "loss": 0.5577, "step": 1066, "task_loss": 0.27279984951019287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35056620836257935, "epoch": 0.9, "learning_rate": 4.3557541359739165e-05, "loss": 0.3079, "step": 1067, "task_loss": 0.37541842460632324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3889786899089813, "epoch": 0.9, "learning_rate": 4.355150344161333e-05, "loss": 0.3506, "step": 1068, "task_loss": 0.9460868835449219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39257383346557617, "epoch": 0.9, "learning_rate": 4.3545465523487506e-05, "loss": 0.3503, "step": 1069, "task_loss": 0.52589350938797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2780054211616516, "epoch": 0.9, "learning_rate": 4.353942760536167e-05, "loss": 0.3402, "step": 1070, "task_loss": 0.6463347673416138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2569604814052582, "epoch": 0.9, "learning_rate": 4.353338968723584e-05, "loss": 0.2757, "step": 1071, "task_loss": 0.34818586707115173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.406528502702713, "epoch": 0.91, "learning_rate": 4.3527351769110014e-05, "loss": 0.346, "step": 1072, "task_loss": 1.1724082231521606 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.262786865234375, "epoch": 0.91, "learning_rate": 4.352131385098418e-05, "loss": 0.2715, "step": 1073, "task_loss": 0.20159520208835602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5527819395065308, "epoch": 0.91, "learning_rate": 4.351527593285835e-05, "loss": 0.3263, "step": 1074, "task_loss": 0.39478641748428345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34328949451446533, "epoch": 0.91, "learning_rate": 4.350923801473252e-05, "loss": 0.3011, "step": 1075, "task_loss": 0.4349038004875183 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2689337730407715, "epoch": 0.91, "learning_rate": 4.35032000966067e-05, "loss": 0.4245, "step": 1076, "task_loss": 0.3119881749153137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2165769338607788, "epoch": 0.91, "learning_rate": 4.349716217848086e-05, "loss": 0.3833, "step": 1077, "task_loss": 1.0348856449127197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20159922540187836, "epoch": 0.91, "learning_rate": 4.349112426035503e-05, "loss": 0.26, "step": 1078, "task_loss": 0.6820428371429443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2304922342300415, "epoch": 0.91, "learning_rate": 4.3485086342229205e-05, "loss": 0.297, "step": 1079, "task_loss": 0.32499027252197266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37198036909103394, "epoch": 0.91, "learning_rate": 4.347904842410337e-05, "loss": 0.3628, "step": 1080, "task_loss": 0.623182475566864 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.580514132976532, "epoch": 0.91, "learning_rate": 4.347301050597754e-05, "loss": 0.4854, "step": 1081, "task_loss": 1.3708230257034302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3139883577823639, "epoch": 0.91, "learning_rate": 4.346697258785171e-05, "loss": 0.3263, "step": 1082, "task_loss": 0.9046801924705505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24799828231334686, "epoch": 0.91, "learning_rate": 4.346093466972588e-05, "loss": 0.2828, "step": 1083, "task_loss": 0.1934831291437149 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2548254430294037, "epoch": 0.92, "learning_rate": 4.345489675160005e-05, "loss": 0.3656, "step": 1084, "task_loss": 0.32228773832321167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20484544336795807, "epoch": 0.92, "learning_rate": 4.344885883347422e-05, "loss": 0.2534, "step": 1085, "task_loss": 0.643466055393219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22415760159492493, "epoch": 0.92, "learning_rate": 4.3442820915348396e-05, "loss": 0.2832, "step": 1086, "task_loss": 0.6243515610694885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4137108623981476, "epoch": 0.92, "learning_rate": 4.3436782997222556e-05, "loss": 0.3559, "step": 1087, "task_loss": 0.5403769016265869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12368832528591156, "epoch": 0.92, "learning_rate": 4.343074507909673e-05, "loss": 0.3114, "step": 1088, "task_loss": 0.3675590455532074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43471309542655945, "epoch": 0.92, "learning_rate": 4.3424707160970904e-05, "loss": 0.3131, "step": 1089, "task_loss": 0.3831470012664795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31848570704460144, "epoch": 0.92, "learning_rate": 4.3418669242845064e-05, "loss": 0.2812, "step": 1090, "task_loss": 0.7664391398429871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21165594458580017, "epoch": 0.92, "learning_rate": 4.341263132471924e-05, "loss": 0.4194, "step": 1091, "task_loss": 1.3853480815887451 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.252130925655365, "epoch": 0.92, "learning_rate": 4.340659340659341e-05, "loss": 0.2741, "step": 1092, "task_loss": 0.5076930522918701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3264647126197815, "epoch": 0.92, "learning_rate": 4.340055548846757e-05, "loss": 0.3794, "step": 1093, "task_loss": 0.6393725872039795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7912085056304932, "epoch": 0.92, "learning_rate": 4.339451757034175e-05, "loss": 0.4379, "step": 1094, "task_loss": 0.5916897058486938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.255278617143631, "epoch": 0.93, "learning_rate": 4.338847965221592e-05, "loss": 0.2737, "step": 1095, "task_loss": 0.42121970653533936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22292080521583557, "epoch": 0.93, "learning_rate": 4.338244173409009e-05, "loss": 0.3096, "step": 1096, "task_loss": 0.20208965241909027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3654591739177704, "epoch": 0.93, "learning_rate": 4.3376403815964255e-05, "loss": 0.3714, "step": 1097, "task_loss": 0.7160986065864563 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3318014144897461, "epoch": 0.93, "learning_rate": 4.337036589783843e-05, "loss": 0.3555, "step": 1098, "task_loss": 0.5789957046508789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4586128294467926, "epoch": 0.93, "learning_rate": 4.3364327979712596e-05, "loss": 0.2875, "step": 1099, "task_loss": 0.9421383738517761 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29096707701683044, "epoch": 0.93, "learning_rate": 4.335829006158676e-05, "loss": 0.2564, "step": 1100, "task_loss": 0.45079097151756287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1400458663702011, "epoch": 0.93, "learning_rate": 4.335225214346094e-05, "loss": 0.3466, "step": 1101, "task_loss": 0.3176582157611847 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2180832177400589, "epoch": 0.93, "learning_rate": 4.3346214225335104e-05, "loss": 0.2295, "step": 1102, "task_loss": 0.552739679813385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1520441472530365, "epoch": 0.93, "learning_rate": 4.334017630720927e-05, "loss": 0.2016, "step": 1103, "task_loss": 0.03420122712850571 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3437575101852417, "epoch": 0.93, "learning_rate": 4.3334138389083446e-05, "loss": 0.2947, "step": 1104, "task_loss": 0.7105420827865601 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2594948410987854, "epoch": 0.93, "learning_rate": 4.332810047095762e-05, "loss": 0.2823, "step": 1105, "task_loss": 1.042885422706604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43233928084373474, "epoch": 0.93, "learning_rate": 4.332206255283179e-05, "loss": 0.3581, "step": 1106, "task_loss": 0.7754158973693848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2751336693763733, "epoch": 0.94, "learning_rate": 4.3316024634705954e-05, "loss": 0.3445, "step": 1107, "task_loss": 0.1947813481092453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4263837933540344, "epoch": 0.94, "learning_rate": 4.330998671658013e-05, "loss": 0.2783, "step": 1108, "task_loss": 0.5212488174438477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29105913639068604, "epoch": 0.94, "learning_rate": 4.3303948798454295e-05, "loss": 0.3763, "step": 1109, "task_loss": 0.32536041736602783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2653868496417999, "epoch": 0.94, "learning_rate": 4.329791088032846e-05, "loss": 0.268, "step": 1110, "task_loss": 0.4643128514289856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3989350199699402, "epoch": 0.94, "learning_rate": 4.3291872962202636e-05, "loss": 0.3036, "step": 1111, "task_loss": 0.4990047216415405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3304842412471771, "epoch": 0.94, "learning_rate": 4.32858350440768e-05, "loss": 0.31, "step": 1112, "task_loss": 0.7792151570320129 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2976856529712677, "epoch": 0.94, "learning_rate": 4.327979712595097e-05, "loss": 0.4303, "step": 1113, "task_loss": 0.3858775794506073 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30325037240982056, "epoch": 0.94, "learning_rate": 4.3273759207825144e-05, "loss": 0.3616, "step": 1114, "task_loss": 0.36662939190864563 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1939043253660202, "epoch": 0.94, "learning_rate": 4.326772128969931e-05, "loss": 0.3242, "step": 1115, "task_loss": 0.08099433779716492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1981017291545868, "epoch": 0.94, "learning_rate": 4.3261683371573486e-05, "loss": 0.2896, "step": 1116, "task_loss": 0.3882223963737488 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20215186476707458, "epoch": 0.94, "learning_rate": 4.325564545344765e-05, "loss": 0.3411, "step": 1117, "task_loss": 0.4387549161911011 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32144787907600403, "epoch": 0.94, "learning_rate": 4.324960753532182e-05, "loss": 0.2998, "step": 1118, "task_loss": 0.25610217452049255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14104512333869934, "epoch": 0.95, "learning_rate": 4.3243569617195994e-05, "loss": 0.2548, "step": 1119, "task_loss": 0.3373267650604248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1450096070766449, "epoch": 0.95, "learning_rate": 4.323753169907016e-05, "loss": 0.2231, "step": 1120, "task_loss": 0.07284929603338242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19169721007347107, "epoch": 0.95, "learning_rate": 4.3231493780944335e-05, "loss": 0.2622, "step": 1121, "task_loss": 1.0857431888580322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15899260342121124, "epoch": 0.95, "learning_rate": 4.32254558628185e-05, "loss": 0.257, "step": 1122, "task_loss": 0.30492138862609863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4094792306423187, "epoch": 0.95, "learning_rate": 4.321941794469267e-05, "loss": 0.3212, "step": 1123, "task_loss": 0.5082416534423828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23796261847019196, "epoch": 0.95, "learning_rate": 4.3213380026566843e-05, "loss": 0.2652, "step": 1124, "task_loss": 0.7401384115219116 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3221428394317627, "epoch": 0.95, "learning_rate": 4.320734210844101e-05, "loss": 0.3812, "step": 1125, "task_loss": 0.4403117001056671 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20508703589439392, "epoch": 0.95, "learning_rate": 4.3201304190315185e-05, "loss": 0.3789, "step": 1126, "task_loss": 0.6447376608848572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33341288566589355, "epoch": 0.95, "learning_rate": 4.319526627218935e-05, "loss": 0.3442, "step": 1127, "task_loss": 1.5649112462997437 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21871832013130188, "epoch": 0.95, "learning_rate": 4.318922835406352e-05, "loss": 0.3433, "step": 1128, "task_loss": 1.027098536491394 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4082304835319519, "epoch": 0.95, "learning_rate": 4.318319043593769e-05, "loss": 0.3242, "step": 1129, "task_loss": 1.0208954811096191 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28492027521133423, "epoch": 0.95, "learning_rate": 4.317715251781186e-05, "loss": 0.3477, "step": 1130, "task_loss": 0.804124653339386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14530184864997864, "epoch": 0.96, "learning_rate": 4.317111459968603e-05, "loss": 0.2882, "step": 1131, "task_loss": 0.0909724086523056 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44638943672180176, "epoch": 0.96, "learning_rate": 4.31650766815602e-05, "loss": 0.3674, "step": 1132, "task_loss": 0.0451495423913002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19970831274986267, "epoch": 0.96, "learning_rate": 4.315903876343437e-05, "loss": 0.3431, "step": 1133, "task_loss": 0.6185499429702759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.327328085899353, "epoch": 0.96, "learning_rate": 4.3153000845308536e-05, "loss": 0.336, "step": 1134, "task_loss": 0.2444780319929123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36800825595855713, "epoch": 0.96, "learning_rate": 4.314696292718271e-05, "loss": 0.4344, "step": 1135, "task_loss": 1.4250314235687256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41619518399238586, "epoch": 0.96, "learning_rate": 4.3140925009056883e-05, "loss": 0.3322, "step": 1136, "task_loss": 0.871273934841156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3113219738006592, "epoch": 0.96, "learning_rate": 4.313488709093105e-05, "loss": 0.355, "step": 1137, "task_loss": 1.1574018001556396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4029061198234558, "epoch": 0.96, "learning_rate": 4.312884917280522e-05, "loss": 0.272, "step": 1138, "task_loss": 0.5418663620948792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24480357766151428, "epoch": 0.96, "learning_rate": 4.312281125467939e-05, "loss": 0.3406, "step": 1139, "task_loss": 1.1182581186294556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5392962694168091, "epoch": 0.96, "learning_rate": 4.311677333655356e-05, "loss": 0.3599, "step": 1140, "task_loss": 0.26745596528053284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5793110728263855, "epoch": 0.96, "learning_rate": 4.3110735418427726e-05, "loss": 0.3818, "step": 1141, "task_loss": 0.7457956075668335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35666385293006897, "epoch": 0.96, "learning_rate": 4.31046975003019e-05, "loss": 0.3731, "step": 1142, "task_loss": 0.9595359563827515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2093726098537445, "epoch": 0.97, "learning_rate": 4.309865958217607e-05, "loss": 0.3168, "step": 1143, "task_loss": 0.514734148979187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17906400561332703, "epoch": 0.97, "learning_rate": 4.3092621664050235e-05, "loss": 0.253, "step": 1144, "task_loss": 0.3397381901741028 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24244970083236694, "epoch": 0.97, "learning_rate": 4.308658374592441e-05, "loss": 0.2957, "step": 1145, "task_loss": 0.3226032555103302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23102456331253052, "epoch": 0.97, "learning_rate": 4.308054582779858e-05, "loss": 0.33, "step": 1146, "task_loss": 1.457642912864685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2346304953098297, "epoch": 0.97, "learning_rate": 4.307450790967274e-05, "loss": 0.248, "step": 1147, "task_loss": 0.3081687390804291 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2174096405506134, "epoch": 0.97, "learning_rate": 4.306846999154692e-05, "loss": 0.2975, "step": 1148, "task_loss": 0.4851154386997223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42781779170036316, "epoch": 0.97, "learning_rate": 4.306243207342109e-05, "loss": 0.2991, "step": 1149, "task_loss": 1.0175594091415405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5341086387634277, "epoch": 0.97, "learning_rate": 4.305639415529525e-05, "loss": 0.3595, "step": 1150, "task_loss": 1.0358262062072754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20490550994873047, "epoch": 0.97, "learning_rate": 4.3050356237169425e-05, "loss": 0.2406, "step": 1151, "task_loss": 0.7428348660469055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3338344693183899, "epoch": 0.97, "learning_rate": 4.30443183190436e-05, "loss": 0.2939, "step": 1152, "task_loss": 0.1914425641298294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16454161703586578, "epoch": 0.97, "learning_rate": 4.3038280400917766e-05, "loss": 0.308, "step": 1153, "task_loss": 0.3811272084712982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.10998144745826721, "epoch": 0.97, "learning_rate": 4.3032242482791933e-05, "loss": 0.3707, "step": 1154, "task_loss": 0.5648936629295349 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27075132727622986, "epoch": 0.98, "learning_rate": 4.302620456466611e-05, "loss": 0.3311, "step": 1155, "task_loss": 0.9098003506660461 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17398475110530853, "epoch": 0.98, "learning_rate": 4.3020166646540275e-05, "loss": 0.2988, "step": 1156, "task_loss": 0.14760294556617737 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25438186526298523, "epoch": 0.98, "learning_rate": 4.301412872841444e-05, "loss": 0.2893, "step": 1157, "task_loss": 0.47098496556282043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33567434549331665, "epoch": 0.98, "learning_rate": 4.3008090810288616e-05, "loss": 0.2621, "step": 1158, "task_loss": 0.30013203620910645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16074144840240479, "epoch": 0.98, "learning_rate": 4.300205289216279e-05, "loss": 0.3318, "step": 1159, "task_loss": 0.380643367767334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25731587409973145, "epoch": 0.98, "learning_rate": 4.299601497403695e-05, "loss": 0.2702, "step": 1160, "task_loss": 0.8126835227012634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17064577341079712, "epoch": 0.98, "learning_rate": 4.2989977055911124e-05, "loss": 0.2624, "step": 1161, "task_loss": 0.19275051355361938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21192097663879395, "epoch": 0.98, "learning_rate": 4.29839391377853e-05, "loss": 0.3034, "step": 1162, "task_loss": 0.6967246532440186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43361207842826843, "epoch": 0.98, "learning_rate": 4.297790121965946e-05, "loss": 0.376, "step": 1163, "task_loss": 1.2016937732696533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2696865200996399, "epoch": 0.98, "learning_rate": 4.297186330153363e-05, "loss": 0.2939, "step": 1164, "task_loss": 0.8029722571372986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3726910352706909, "epoch": 0.98, "learning_rate": 4.2965825383407806e-05, "loss": 0.2944, "step": 1165, "task_loss": 0.4432227611541748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28014546632766724, "epoch": 0.99, "learning_rate": 4.2959787465281974e-05, "loss": 0.3208, "step": 1166, "task_loss": 0.6120973229408264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28813737630844116, "epoch": 0.99, "learning_rate": 4.295374954715614e-05, "loss": 0.2437, "step": 1167, "task_loss": 0.21252965927124023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30225542187690735, "epoch": 0.99, "learning_rate": 4.2947711629030315e-05, "loss": 0.2979, "step": 1168, "task_loss": 1.1310405731201172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4308841824531555, "epoch": 0.99, "learning_rate": 4.294167371090448e-05, "loss": 0.3258, "step": 1169, "task_loss": 0.7272492051124573 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2597977817058563, "epoch": 0.99, "learning_rate": 4.293563579277865e-05, "loss": 0.2245, "step": 1170, "task_loss": 0.2009928822517395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22145745158195496, "epoch": 0.99, "learning_rate": 4.292959787465282e-05, "loss": 0.2256, "step": 1171, "task_loss": 0.14712683856487274 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3405866026878357, "epoch": 0.99, "learning_rate": 4.292355995652699e-05, "loss": 0.301, "step": 1172, "task_loss": 1.0793962478637695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19213813543319702, "epoch": 0.99, "learning_rate": 4.291752203840116e-05, "loss": 0.2603, "step": 1173, "task_loss": 0.22636237740516663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3139817714691162, "epoch": 0.99, "learning_rate": 4.291148412027533e-05, "loss": 0.3562, "step": 1174, "task_loss": 0.49623751640319824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26644977927207947, "epoch": 0.99, "learning_rate": 4.29054462021495e-05, "loss": 0.3119, "step": 1175, "task_loss": 1.0742558240890503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.646160900592804, "epoch": 0.99, "learning_rate": 4.289940828402367e-05, "loss": 0.4062, "step": 1176, "task_loss": 0.5107670426368713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1254703253507614, "epoch": 0.99, "learning_rate": 4.289337036589784e-05, "loss": 0.1847, "step": 1177, "task_loss": 0.46790412068367004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22822053730487823, "epoch": 1.0, "learning_rate": 4.2887332447772014e-05, "loss": 0.332, "step": 1178, "task_loss": 0.5761216282844543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2590216100215912, "epoch": 1.0, "learning_rate": 4.288129452964618e-05, "loss": 0.3015, "step": 1179, "task_loss": 1.237515926361084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12651792168617249, "epoch": 1.0, "learning_rate": 4.287525661152035e-05, "loss": 0.2691, "step": 1180, "task_loss": 0.32989341020584106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30137306451797485, "epoch": 1.0, "learning_rate": 4.286921869339452e-05, "loss": 0.2854, "step": 1181, "task_loss": 1.315637230873108 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20005843043327332, "epoch": 1.0, "learning_rate": 4.286318077526869e-05, "loss": 0.2833, "step": 1182, "task_loss": 0.16443997621536255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20300468802452087, "epoch": 1.0, "learning_rate": 4.2857142857142856e-05, "loss": 0.2896, "step": 1183, "task_loss": 0.29906517267227173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -0.004582871682941914, "compression/movement_sparsity/linear_layer_sparsity": 0.0010030490573611607, "compression/movement_sparsity/model_sparsity": 0.0009685912365009771, "compression_loss": 0.0, "distillation_loss": 0.2979019284248352, "epoch": 1.0, "learning_rate": 4.285110493901703e-05, "loss": 0.566, "step": 1184, "task_loss": 0.7764566540718079 }, { "compression/movement_sparsity/importance_regularization_factor": 0.002531644366179364, "compression/movement_sparsity/importance_threshold": -0.0045712694816648705, "compression/movement_sparsity/linear_layer_sparsity": 0.0010259553833896536, "compression/movement_sparsity/model_sparsity": 0.00099071065976229, "compression_loss": 0.2735269069671631, "distillation_loss": 0.2972022294998169, "epoch": 1.0, "learning_rate": 4.28450670208912e-05, "loss": 0.5837, "step": 1185, "task_loss": 0.9385803937911987 }, { "compression/movement_sparsity/importance_regularization_factor": 0.005059012307081834, "compression/movement_sparsity/importance_threshold": -0.0045596868786961335, "compression/movement_sparsity/linear_layer_sparsity": 0.0010290198944720704, "compression/movement_sparsity/model_sparsity": 0.000993669895461487, "compression_loss": 0.5465914607048035, "distillation_loss": 0.27455708384513855, "epoch": 1.0, "learning_rate": 4.283902910276537e-05, "loss": 0.8638, "step": 1186, "task_loss": 0.4351390302181244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.007582107437607011, "compression/movement_sparsity/importance_threshold": -0.004548123857469081, "compression/movement_sparsity/linear_layer_sparsity": 0.0010320724813868514, "compression/movement_sparsity/model_sparsity": 0.0009966176166248894, "compression_loss": 0.8191942572593689, "distillation_loss": 0.608700156211853, "epoch": 1.0, "learning_rate": 4.283299118463954e-05, "loss": 1.1873, "step": 1187, "task_loss": 0.7812014818191528 }, { "compression/movement_sparsity/importance_regularization_factor": 0.010100933372653165, "compression/movement_sparsity/importance_threshold": -0.0045365804014170985, "compression/movement_sparsity/linear_layer_sparsity": 0.0010366513617590227, "compression/movement_sparsity/model_sparsity": 0.0010010391983699932, "compression_loss": 1.0913352966308594, "distillation_loss": 0.5495626926422119, "epoch": 1.0, "learning_rate": 4.2826953266513706e-05, "loss": 1.5247, "step": 1188, "task_loss": 0.5898081660270691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.012615493727119342, "compression/movement_sparsity/importance_threshold": -0.004525056493973567, "compression/movement_sparsity/linear_layer_sparsity": 0.0010824520896483725, "compression/movement_sparsity/model_sparsity": 0.0010452665303568242, "compression_loss": 1.363014578819275, "distillation_loss": 0.2113880217075348, "epoch": 1.01, "learning_rate": 4.282091534838788e-05, "loss": 1.6571, "step": 1189, "task_loss": 0.3118751049041748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.015125792115903702, "compression/movement_sparsity/importance_threshold": -0.0045135521185718725, "compression/movement_sparsity/linear_layer_sparsity": 0.0011131687454783556, "compression/movement_sparsity/model_sparsity": 0.0010749279745635614, "compression_loss": 1.6342339515686035, "distillation_loss": 0.4433828294277191, "epoch": 1.01, "learning_rate": 4.281487743026205e-05, "loss": 1.974, "step": 1190, "task_loss": 0.20505626499652863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.017631832153905957, "compression/movement_sparsity/importance_threshold": -0.0045020672586453935, "compression/movement_sparsity/linear_layer_sparsity": 0.0011223265062226984, "compression/movement_sparsity/model_sparsity": 0.0010837711380537688, "compression_loss": 1.9049921035766602, "distillation_loss": 0.29366523027420044, "epoch": 1.01, "learning_rate": 4.2808839512136214e-05, "loss": 2.1117, "step": 1191, "task_loss": 0.4957435429096222 }, { "compression/movement_sparsity/importance_regularization_factor": 0.02013361745602449, "compression/movement_sparsity/importance_threshold": -0.004490601897627514, "compression/movement_sparsity/linear_layer_sparsity": 0.0011589575492000695, "compression/movement_sparsity/model_sparsity": 0.001119143792014598, "compression_loss": 2.175288200378418, "distillation_loss": 0.18494950234889984, "epoch": 1.01, "learning_rate": 4.280280159401039e-05, "loss": 2.4686, "step": 1192, "task_loss": 0.30005085468292236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.022631151637157676, "compression/movement_sparsity/importance_threshold": -0.0044791560189516196, "compression/movement_sparsity/linear_layer_sparsity": 0.0011681391582796842, "compression/movement_sparsity/model_sparsity": 0.0011280099845763945, "compression_loss": 2.4451239109039307, "distillation_loss": 0.2844278812408447, "epoch": 1.01, "learning_rate": 4.2796763675884555e-05, "loss": 2.7955, "step": 1193, "task_loss": 0.7436234354972839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.025124438312205122, "compression/movement_sparsity/importance_threshold": -0.004467729606051088, "compression/movement_sparsity/linear_layer_sparsity": 0.001171191745194465, "compression/movement_sparsity/model_sparsity": 0.0011309577057397968, "compression_loss": 2.7144975662231445, "distillation_loss": 0.31338486075401306, "epoch": 1.01, "learning_rate": 4.279072575775873e-05, "loss": 3.1256, "step": 1194, "task_loss": 1.1345257759094238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.027613481096065207, "compression/movement_sparsity/importance_threshold": -0.004456322642359304, "compression/movement_sparsity/linear_layer_sparsity": 0.0012062964947144458, "compression/movement_sparsity/model_sparsity": 0.001164856499118925, "compression_loss": 2.9834110736846924, "distillation_loss": 0.15745842456817627, "epoch": 1.01, "learning_rate": 4.2784687839632896e-05, "loss": 3.3426, "step": 1195, "task_loss": 0.06557552516460419 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03009828360363631, "compression/movement_sparsity/importance_threshold": -0.004444935111309654, "compression/movement_sparsity/linear_layer_sparsity": 0.0012246120162031314, "compression/movement_sparsity/model_sparsity": 0.0011825428260993397, "compression_loss": 3.2518651485443115, "distillation_loss": 0.18217170238494873, "epoch": 1.01, "learning_rate": 4.277864992150707e-05, "loss": 3.6917, "step": 1196, "task_loss": 0.37872955203056335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03257884944981815, "compression/movement_sparsity/importance_threshold": -0.004433566996335514, "compression/movement_sparsity/linear_layer_sparsity": 0.0012641167835807455, "compression/movement_sparsity/model_sparsity": 0.0012206904831866532, "compression_loss": 3.519860029220581, "distillation_loss": 0.39569979906082153, "epoch": 1.01, "learning_rate": 4.277261200338124e-05, "loss": 3.8143, "step": 1197, "task_loss": 0.4707227945327759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03505518224950899, "compression/movement_sparsity/importance_threshold": -0.004422218280870271, "compression/movement_sparsity/linear_layer_sparsity": 0.001325359308558538, "compression/movement_sparsity/model_sparsity": 0.001279829139027415, "compression_loss": 3.7873928546905518, "distillation_loss": 0.1788415014743805, "epoch": 1.01, "learning_rate": 4.2766574085255405e-05, "loss": 4.0707, "step": 1198, "task_loss": 0.6244291663169861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03752728561760765, "compression/movement_sparsity/importance_threshold": -0.004410888948347306, "compression/movement_sparsity/linear_layer_sparsity": 0.0013497919280444212, "compression/movement_sparsity/model_sparsity": 0.0013034224228704288, "compression_loss": 4.0544657707214355, "distillation_loss": 0.21916665136814117, "epoch": 1.01, "learning_rate": 4.276053616712958e-05, "loss": 4.3294, "step": 1199, "task_loss": 0.9281142354011536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03999516316901264, "compression/movement_sparsity/importance_threshold": -0.004399578982200004, "compression/movement_sparsity/linear_layer_sparsity": 0.001400743896352464, "compression/movement_sparsity/model_sparsity": 0.0013526240343205017, "compression_loss": 4.321077823638916, "distillation_loss": 0.42287951707839966, "epoch": 1.01, "learning_rate": 4.2754498249003746e-05, "loss": 4.7223, "step": 1200, "task_loss": 0.5141656398773193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04245881851862332, "compression/movement_sparsity/importance_threshold": -0.004388288365861745, "compression/movement_sparsity/linear_layer_sparsity": 0.0014059070609387925, "compression/movement_sparsity/model_sparsity": 0.0013576098283195378, "compression_loss": 4.587231636047363, "distillation_loss": 0.238483265042305, "epoch": 1.02, "learning_rate": 4.274846033087791e-05, "loss": 4.9283, "step": 1201, "task_loss": 0.3175157308578491 }, { "compression/movement_sparsity/importance_regularization_factor": 0.044918255281338304, "compression/movement_sparsity/importance_threshold": -0.004377017082765913, "compression/movement_sparsity/linear_layer_sparsity": 0.0014247949424739995, "compression/movement_sparsity/model_sparsity": 0.0013758488530180905, "compression_loss": 4.852923393249512, "distillation_loss": 0.30781543254852295, "epoch": 1.02, "learning_rate": 4.274242241275209e-05, "loss": 5.2274, "step": 1202, "task_loss": 0.5567305088043213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04737347707205597, "compression/movement_sparsity/importance_threshold": -0.00436576511634589, "compression/movement_sparsity/linear_layer_sparsity": 0.0014452329658018689, "compression/movement_sparsity/model_sparsity": 0.0013955847673699335, "compression_loss": 5.118154525756836, "distillation_loss": 0.27963826060295105, "epoch": 1.02, "learning_rate": 4.2736384494626254e-05, "loss": 5.4936, "step": 1203, "task_loss": 0.8914282321929932 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04982448750567581, "compression/movement_sparsity/importance_threshold": -0.004354532450035059, "compression/movement_sparsity/linear_layer_sparsity": 0.0014616525446364522, "compression/movement_sparsity/model_sparsity": 0.0014114402831590161, "compression_loss": 5.382930278778076, "distillation_loss": 0.3676857352256775, "epoch": 1.02, "learning_rate": 4.273034657650042e-05, "loss": 5.805, "step": 1204, "task_loss": 0.9873665571212769 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05227129019709631, "compression/movement_sparsity/importance_threshold": -0.0043433190672668015, "compression/movement_sparsity/linear_layer_sparsity": 0.001534342270544673, "compression/movement_sparsity/model_sparsity": 0.0014816328933625368, "compression_loss": 5.647240161895752, "distillation_loss": 0.2560694217681885, "epoch": 1.02, "learning_rate": 4.2724308658374595e-05, "loss": 5.9633, "step": 1205, "task_loss": 0.2549925148487091 }, { "compression/movement_sparsity/importance_regularization_factor": 0.054713888761215856, "compression/movement_sparsity/importance_threshold": -0.004332124951474504, "compression/movement_sparsity/linear_layer_sparsity": 0.0016127913694210158, "compression/movement_sparsity/model_sparsity": 0.0015573870243548206, "compression_loss": 5.911093711853027, "distillation_loss": 0.4659309387207031, "epoch": 1.02, "learning_rate": 4.271827074024877e-05, "loss": 6.3211, "step": 1206, "task_loss": 0.4940612316131592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05715228681293394, "compression/movement_sparsity/importance_threshold": -0.004320950086091544, "compression/movement_sparsity/linear_layer_sparsity": 0.0016541920794527322, "compression/movement_sparsity/model_sparsity": 0.0015973654926334663, "compression_loss": 6.174484729766846, "distillation_loss": 0.3278801441192627, "epoch": 1.02, "learning_rate": 4.271223282212293e-05, "loss": 6.4747, "step": 1207, "task_loss": 0.5654054880142212 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05958648796714905, "compression/movement_sparsity/importance_threshold": -0.004309794454551307, "compression/movement_sparsity/linear_layer_sparsity": 0.001720013484802696, "compression/movement_sparsity/model_sparsity": 0.0016609257302193316, "compression_loss": 6.437419891357422, "distillation_loss": 0.29293307662010193, "epoch": 1.02, "learning_rate": 4.2706194903997104e-05, "loss": 6.7612, "step": 1208, "task_loss": 1.6986949443817139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06201649583876001, "compression/movement_sparsity/importance_threshold": -0.004298658040287175, "compression/movement_sparsity/linear_layer_sparsity": 0.001854327309053057, "compression/movement_sparsity/model_sparsity": 0.0017906254614090391, "compression_loss": 6.69988489151001, "distillation_loss": 0.2984928488731384, "epoch": 1.02, "learning_rate": 4.270015698587128e-05, "loss": 7.0283, "step": 1209, "task_loss": 0.8940395712852478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0644423140426652, "compression/movement_sparsity/importance_threshold": -0.004287540826732533, "compression/movement_sparsity/linear_layer_sparsity": 0.001954871890558654, "compression/movement_sparsity/model_sparsity": 0.001887716027228607, "compression_loss": 6.961897850036621, "distillation_loss": 0.354250967502594, "epoch": 1.02, "learning_rate": 4.2694119067745445e-05, "loss": 7.3816, "step": 1210, "task_loss": 1.006991982460022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06686394619376412, "compression/movement_sparsity/importance_threshold": -0.00427644279732076, "compression/movement_sparsity/linear_layer_sparsity": 0.002064013796929708, "compression/movement_sparsity/model_sparsity": 0.001993108573356039, "compression_loss": 7.223446369171143, "distillation_loss": 0.2887153625488281, "epoch": 1.02, "learning_rate": 4.268808114961961e-05, "loss": 7.6016, "step": 1211, "task_loss": 0.9796630144119263 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06928139590695526, "compression/movement_sparsity/importance_threshold": -0.004265363935485241, "compression/movement_sparsity/linear_layer_sparsity": 0.0021968251760579502, "compression/movement_sparsity/model_sparsity": 0.0021213574730356345, "compression_loss": 7.4845356941223145, "distillation_loss": 0.4588213264942169, "epoch": 1.02, "learning_rate": 4.2682043231493786e-05, "loss": 7.8386, "step": 1212, "task_loss": 0.8736432790756226 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0716946667971371, "compression/movement_sparsity/importance_threshold": -0.004254304224659358, "compression/movement_sparsity/linear_layer_sparsity": 0.0023422046278743923, "compression/movement_sparsity/model_sparsity": 0.002261742693442676, "compression_loss": 7.745163440704346, "distillation_loss": 0.1826983094215393, "epoch": 1.03, "learning_rate": 4.267600531336795e-05, "loss": 7.9382, "step": 1213, "task_loss": 0.19959872961044312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07410376247920902, "compression/movement_sparsity/importance_threshold": -0.004243263648276493, "compression/movement_sparsity/linear_layer_sparsity": 0.002462591024326065, "compression/movement_sparsity/model_sparsity": 0.0023779934468243598, "compression_loss": 8.005327224731445, "distillation_loss": 0.3748864531517029, "epoch": 1.03, "learning_rate": 4.266996739524212e-05, "loss": 8.4308, "step": 1214, "task_loss": 0.4523988366127014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07650868656806953, "compression/movement_sparsity/importance_threshold": -0.004232242189770029, "compression/movement_sparsity/linear_layer_sparsity": 0.002560297653934327, "compression/movement_sparsity/model_sparsity": 0.002472343553124827, "compression_loss": 8.265039443969727, "distillation_loss": 0.1944810152053833, "epoch": 1.03, "learning_rate": 4.2663929477116294e-05, "loss": 8.6001, "step": 1215, "task_loss": 0.9567658305168152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0789094426786171, "compression/movement_sparsity/importance_threshold": -0.004221239832573351, "compression/movement_sparsity/linear_layer_sparsity": 0.002660460662075576, "compression/movement_sparsity/model_sparsity": 0.0025690656537989695, "compression_loss": 8.524290084838867, "distillation_loss": 0.2815321981906891, "epoch": 1.03, "learning_rate": 4.265789155899047e-05, "loss": 8.9282, "step": 1216, "task_loss": 0.39497560262680054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08130603442575124, "compression/movement_sparsity/importance_threshold": -0.0042102565601198376, "compression/movement_sparsity/linear_layer_sparsity": 0.0028865548046191746, "compression/movement_sparsity/model_sparsity": 0.0027873927669992576, "compression_loss": 8.783085823059082, "distillation_loss": 0.34010598063468933, "epoch": 1.03, "learning_rate": 4.265185364086463e-05, "loss": 9.0887, "step": 1217, "task_loss": 0.5490818023681641 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0836984654243702, "compression/movement_sparsity/importance_threshold": -0.0041992923558428746, "compression/movement_sparsity/linear_layer_sparsity": 0.0030605641829293236, "compression/movement_sparsity/model_sparsity": 0.0029554243878489915, "compression_loss": 9.041414260864258, "distillation_loss": 0.2901913821697235, "epoch": 1.03, "learning_rate": 4.26458157227388e-05, "loss": 9.3702, "step": 1218, "task_loss": 0.7763687968254089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08608673928937305, "compression/movement_sparsity/importance_threshold": -0.004188347203175843, "compression/movement_sparsity/linear_layer_sparsity": 0.0032257735255242057, "compression/movement_sparsity/model_sparsity": 0.003114958281282354, "compression_loss": 9.299286842346191, "distillation_loss": 0.2729113698005676, "epoch": 1.03, "learning_rate": 4.2639777804612977e-05, "loss": 9.5571, "step": 1219, "task_loss": 0.4134138524532318 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08847085963565826, "compression/movement_sparsity/importance_threshold": -0.004177421085552127, "compression/movement_sparsity/linear_layer_sparsity": 0.003535253372345394, "compression/movement_sparsity/model_sparsity": 0.0034138065432938658, "compression_loss": 9.5566987991333, "distillation_loss": 0.1201941967010498, "epoch": 1.03, "learning_rate": 4.263373988648714e-05, "loss": 9.8455, "step": 1220, "task_loss": 0.5872335433959961 }, { "compression/movement_sparsity/importance_regularization_factor": 0.090850830078125, "compression/movement_sparsity/importance_threshold": -0.004166513986405107, "compression/movement_sparsity/linear_layer_sparsity": 0.003907120464237418, "compression/movement_sparsity/model_sparsity": 0.003772898856582415, "compression_loss": 9.813648223876953, "distillation_loss": 0.30745095014572144, "epoch": 1.03, "learning_rate": 4.262770196836131e-05, "loss": 10.0886, "step": 1221, "task_loss": 0.9734212756156921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09322665423167198, "compression/movement_sparsity/importance_threshold": -0.0041556258891681675, "compression/movement_sparsity/linear_layer_sparsity": 0.0042949897890967705, "compression/movement_sparsity/model_sparsity": 0.004147443676907238, "compression_loss": 10.070133209228516, "distillation_loss": 0.2524697184562683, "epoch": 1.03, "learning_rate": 4.2621664050235485e-05, "loss": 10.418, "step": 1222, "task_loss": 0.5039573907852173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09559833571119758, "compression/movement_sparsity/importance_threshold": -0.004144756777274692, "compression/movement_sparsity/linear_layer_sparsity": 0.0046193152246246085, "compression/movement_sparsity/model_sparsity": 0.004460627535982953, "compression_loss": 10.326163291931152, "distillation_loss": 0.3355924189090729, "epoch": 1.03, "learning_rate": 4.2615626132109645e-05, "loss": 10.7145, "step": 1223, "task_loss": 0.10737408697605133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09796587813160129, "compression/movement_sparsity/importance_threshold": -0.00413390663415806, "compression/movement_sparsity/linear_layer_sparsity": 0.0049200069598981655, "compression/movement_sparsity/model_sparsity": 0.0047509895851138885, "compression_loss": 10.581745147705078, "distillation_loss": 0.4191606342792511, "epoch": 1.03, "learning_rate": 4.260958821398382e-05, "loss": 10.9797, "step": 1224, "task_loss": 1.102081060409546 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1003292851077815, "compression/movement_sparsity/importance_threshold": -0.004123075443251656, "compression/movement_sparsity/linear_layer_sparsity": 0.005208667210027137, "compression/movement_sparsity/model_sparsity": 0.005029733467628132, "compression_loss": 10.836868286132812, "distillation_loss": 0.319335401058197, "epoch": 1.04, "learning_rate": 4.260355029585799e-05, "loss": 11.1461, "step": 1225, "task_loss": 0.7531952261924744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10268856025463713, "compression/movement_sparsity/importance_threshold": -0.004112263187988863, "compression/movement_sparsity/linear_layer_sparsity": 0.005549817646089178, "compression/movement_sparsity/model_sparsity": 0.005359164336709944, "compression_loss": 11.091536521911621, "distillation_loss": 0.2719825208187103, "epoch": 1.04, "learning_rate": 4.259751237773216e-05, "loss": 11.492, "step": 1226, "task_loss": 0.2890942096710205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10504370718706657, "compression/movement_sparsity/importance_threshold": -0.004101469851803064, "compression/movement_sparsity/linear_layer_sparsity": 0.005901246714653333, "compression/movement_sparsity/model_sparsity": 0.005698520735646651, "compression_loss": 11.345746040344238, "distillation_loss": 0.20737068355083466, "epoch": 1.04, "learning_rate": 4.259147445960633e-05, "loss": 11.6564, "step": 1227, "task_loss": 0.5680353045463562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1073947295199692, "compression/movement_sparsity/importance_threshold": -0.004090695418127641, "compression/movement_sparsity/linear_layer_sparsity": 0.0063402468703352656, "compression/movement_sparsity/model_sparsity": 0.006122439885458465, "compression_loss": 11.599504470825195, "distillation_loss": 0.36999279260635376, "epoch": 1.04, "learning_rate": 4.25854365414805e-05, "loss": 11.9319, "step": 1228, "task_loss": 0.9201506972312927 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10974163086824362, "compression/movement_sparsity/importance_threshold": -0.004079939870395975, "compression/movement_sparsity/linear_layer_sparsity": 0.006865303743845221, "compression/movement_sparsity/model_sparsity": 0.006629459440099479, "compression_loss": 11.852808952331543, "distillation_loss": 0.21453885734081268, "epoch": 1.04, "learning_rate": 4.257939862335467e-05, "loss": 12.1649, "step": 1229, "task_loss": 0.2664773762226105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11208441484678833, "compression/movement_sparsity/importance_threshold": -0.0040692031920414535, "compression/movement_sparsity/linear_layer_sparsity": 0.007315381451260871, "compression/movement_sparsity/model_sparsity": 0.007064075593664422, "compression_loss": 12.10567855834961, "distillation_loss": 0.409354567527771, "epoch": 1.04, "learning_rate": 4.2573360705228836e-05, "loss": 12.4557, "step": 1230, "task_loss": 0.20989564061164856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1144230850705027, "compression/movement_sparsity/importance_threshold": -0.004058485366497453, "compression/movement_sparsity/linear_layer_sparsity": 0.007931240861317924, "compression/movement_sparsity/model_sparsity": 0.007658778338380865, "compression_loss": 12.358091354370117, "distillation_loss": 0.3727341294288635, "epoch": 1.04, "learning_rate": 4.256732278710301e-05, "loss": 12.7365, "step": 1231, "task_loss": 0.2702047526836395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1167576451542851, "compression/movement_sparsity/importance_threshold": -0.00404778637719736, "compression/movement_sparsity/linear_layer_sparsity": 0.008521367917616785, "compression/movement_sparsity/model_sparsity": 0.008228632714852689, "compression_loss": 12.610051155090332, "distillation_loss": 0.33078864216804504, "epoch": 1.04, "learning_rate": 4.256128486897718e-05, "loss": 12.9669, "step": 1232, "task_loss": 1.3284165859222412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11908809871303416, "compression/movement_sparsity/importance_threshold": -0.004037106207574558, "compression/movement_sparsity/linear_layer_sparsity": 0.008923748954488981, "compression/movement_sparsity/model_sparsity": 0.008617190725239468, "compression_loss": 12.861563682556152, "distillation_loss": 0.3240550756454468, "epoch": 1.04, "learning_rate": 4.2555246950851344e-05, "loss": 13.2581, "step": 1233, "task_loss": 1.088853120803833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12141444936164936, "compression/movement_sparsity/importance_threshold": -0.004026444841062426, "compression/movement_sparsity/linear_layer_sparsity": 0.009406260397874178, "compression/movement_sparsity/model_sparsity": 0.00908312641616556, "compression_loss": 13.11262321472168, "distillation_loss": 0.5148484110832214, "epoch": 1.04, "learning_rate": 4.254920903272552e-05, "loss": 13.6061, "step": 1234, "task_loss": 0.17603550851345062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12373670071502896, "compression/movement_sparsity/importance_threshold": -0.004015802261094349, "compression/movement_sparsity/linear_layer_sparsity": 0.009914134545820855, "compression/movement_sparsity/model_sparsity": 0.009573553524726643, "compression_loss": 13.363228797912598, "distillation_loss": 0.35552290081977844, "epoch": 1.04, "learning_rate": 4.254317111459969e-05, "loss": 13.7532, "step": 1235, "task_loss": 0.8817396759986877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1260548563880719, "compression/movement_sparsity/importance_threshold": -0.004005178451103709, "compression/movement_sparsity/linear_layer_sparsity": 0.010331385019734974, "compression/movement_sparsity/model_sparsity": 0.009976470161249213, "compression_loss": 13.613393783569336, "distillation_loss": 0.35389167070388794, "epoch": 1.04, "learning_rate": 4.253713319647385e-05, "loss": 13.965, "step": 1236, "task_loss": 1.0384838581085205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12836891999567668, "compression/movement_sparsity/importance_threshold": -0.0039945733945238905, "compression/movement_sparsity/linear_layer_sparsity": 0.010839283016016924, "compression/movement_sparsity/model_sparsity": 0.010466920298881884, "compression_loss": 13.863100051879883, "distillation_loss": 0.3721335530281067, "epoch": 1.05, "learning_rate": 4.2531095278348026e-05, "loss": 14.1951, "step": 1237, "task_loss": 0.7117844820022583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13067889515274267, "compression/movement_sparsity/importance_threshold": -0.003983987074788274, "compression/movement_sparsity/linear_layer_sparsity": 0.011440654562396404, "compression/movement_sparsity/model_sparsity": 0.01104763288260796, "compression_loss": 14.112360000610352, "distillation_loss": 0.40425238013267517, "epoch": 1.05, "learning_rate": 4.25250573602222e-05, "loss": 14.5243, "step": 1238, "task_loss": 0.557571530342102 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13298478547416837, "compression/movement_sparsity/importance_threshold": -0.003973419475330242, "compression/movement_sparsity/linear_layer_sparsity": 0.011865739214447283, "compression/movement_sparsity/model_sparsity": 0.011458114569147545, "compression_loss": 14.361180305480957, "distillation_loss": 0.14665082097053528, "epoch": 1.05, "learning_rate": 4.251901944209637e-05, "loss": 14.8576, "step": 1239, "task_loss": 0.11613530665636063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13528659457485237, "compression/movement_sparsity/importance_threshold": -0.0039628705795831795, "compression/movement_sparsity/linear_layer_sparsity": 0.012378204166933767, "compression/movement_sparsity/model_sparsity": 0.011952974773989525, "compression_loss": 14.60955810546875, "distillation_loss": 0.6238508224487305, "epoch": 1.05, "learning_rate": 4.2512981523970535e-05, "loss": 15.0809, "step": 1240, "task_loss": 0.7568769454956055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13758432606969384, "compression/movement_sparsity/importance_threshold": -0.003952340370980467, "compression/movement_sparsity/linear_layer_sparsity": 0.012832669968039416, "compression/movement_sparsity/model_sparsity": 0.012391828276726859, "compression_loss": 14.857502937316895, "distillation_loss": 0.3697394132614136, "epoch": 1.05, "learning_rate": 4.250694360584471e-05, "loss": 15.2303, "step": 1241, "task_loss": 0.11848543584346771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1398779835735915, "compression/movement_sparsity/importance_threshold": -0.003941828832955487, "compression/movement_sparsity/linear_layer_sparsity": 0.01337165426934806, "compression/movement_sparsity/model_sparsity": 0.012912296809175897, "compression_loss": 15.104987144470215, "distillation_loss": 0.479628324508667, "epoch": 1.05, "learning_rate": 4.2500905687718876e-05, "loss": 15.4811, "step": 1242, "task_loss": 0.7740815877914429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1421675707014437, "compression/movement_sparsity/importance_threshold": -0.003931335948941625, "compression/movement_sparsity/linear_layer_sparsity": 0.013911413641553034, "compression/movement_sparsity/model_sparsity": 0.01343351378645158, "compression_loss": 15.352028846740723, "distillation_loss": 0.6251940727233887, "epoch": 1.05, "learning_rate": 4.249486776959304e-05, "loss": 15.8209, "step": 1243, "task_loss": 1.2368892431259155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14445309106814996, "compression/movement_sparsity/importance_threshold": -0.00392086170237226, "compression/movement_sparsity/linear_layer_sparsity": 0.014624788894536174, "compression/movement_sparsity/model_sparsity": 0.014122382404895778, "compression_loss": 15.598628997802734, "distillation_loss": 0.2747795581817627, "epoch": 1.05, "learning_rate": 4.248882985146722e-05, "loss": 16.0519, "step": 1244, "task_loss": 0.8534819483757019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14673454828860866, "compression/movement_sparsity/importance_threshold": -0.003910406076680776, "compression/movement_sparsity/linear_layer_sparsity": 0.01519466871418934, "compression/movement_sparsity/model_sparsity": 0.014672685099588472, "compression_loss": 15.8447904586792, "distillation_loss": 0.558049738407135, "epoch": 1.05, "learning_rate": 4.2482791933341384e-05, "loss": 16.3841, "step": 1245, "task_loss": 0.6528630256652832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1490119459777186, "compression/movement_sparsity/importance_threshold": -0.0038999690553005568, "compression/movement_sparsity/linear_layer_sparsity": 0.015880558760771817, "compression/movement_sparsity/model_sparsity": 0.015335012713026253, "compression_loss": 16.090503692626953, "distillation_loss": 0.5402536988258362, "epoch": 1.05, "learning_rate": 4.247675401521555e-05, "loss": 16.7181, "step": 1246, "task_loss": 0.7982296347618103 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15128528775037842, "compression/movement_sparsity/importance_threshold": -0.0038895506216649854, "compression/movement_sparsity/linear_layer_sparsity": 0.016527540248358473, "compression/movement_sparsity/model_sparsity": 0.015959768396166446, "compression_loss": 16.33578109741211, "distillation_loss": 0.4772404134273529, "epoch": 1.05, "learning_rate": 4.2470716097089725e-05, "loss": 17.0804, "step": 1247, "task_loss": 0.47390374541282654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15355457722148735, "compression/movement_sparsity/importance_threshold": -0.0038791507592074418, "compression/movement_sparsity/linear_layer_sparsity": 0.01707489531534749, "compression/movement_sparsity/model_sparsity": 0.016488320132743255, "compression_loss": 16.58063507080078, "distillation_loss": 0.755258321762085, "epoch": 1.05, "learning_rate": 4.246467817896389e-05, "loss": 17.1654, "step": 1248, "task_loss": 1.2012611627578735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15581981800594402, "compression/movement_sparsity/importance_threshold": -0.00386876945136131, "compression/movement_sparsity/linear_layer_sparsity": 0.017753547392175, "compression/movement_sparsity/model_sparsity": 0.017143658422953748, "compression_loss": 16.825054168701172, "distillation_loss": 0.7593815326690674, "epoch": 1.06, "learning_rate": 4.2458640260838067e-05, "loss": 17.2865, "step": 1249, "task_loss": 0.6256983876228333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1580810137186469, "compression/movement_sparsity/importance_threshold": -0.0038584066815599742, "compression/movement_sparsity/linear_layer_sparsity": 0.01842437721503338, "compression/movement_sparsity/model_sparsity": 0.017791443177683025, "compression_loss": 17.069021224975586, "distillation_loss": 0.3839413523674011, "epoch": 1.06, "learning_rate": 4.2452602342712234e-05, "loss": 17.6899, "step": 1250, "task_loss": 0.5304906368255615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1603381679744953, "compression/movement_sparsity/importance_threshold": -0.003848062433236815, "compression/movement_sparsity/linear_layer_sparsity": 0.018965639032360475, "compression/movement_sparsity/model_sparsity": 0.01831411098646882, "compression_loss": 17.312536239624023, "distillation_loss": 0.7868106365203857, "epoch": 1.06, "learning_rate": 4.244656442458641e-05, "loss": 17.913, "step": 1251, "task_loss": 1.0470547676086426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16259128438838777, "compression/movement_sparsity/importance_threshold": -0.0038377366898252156, "compression/movement_sparsity/linear_layer_sparsity": 0.01958477758851739, "compression/movement_sparsity/model_sparsity": 0.01891198022852876, "compression_loss": 17.555635452270508, "distillation_loss": 0.5118089914321899, "epoch": 1.06, "learning_rate": 4.2440526506460575e-05, "loss": 18.0005, "step": 1252, "task_loss": 1.5048199892044067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16484036657522272, "compression/movement_sparsity/importance_threshold": -0.0038274294347585606, "compression/movement_sparsity/linear_layer_sparsity": 0.020287075289766813, "compression/movement_sparsity/model_sparsity": 0.01959015184321983, "compression_loss": 17.798269271850586, "distillation_loss": 0.4211500883102417, "epoch": 1.06, "learning_rate": 4.243448858833474e-05, "loss": 18.3106, "step": 1253, "task_loss": 0.9412956833839417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16708541814989974, "compression/movement_sparsity/importance_threshold": -0.0038171406514702294, "compression/movement_sparsity/linear_layer_sparsity": 0.020857336682784326, "compression/movement_sparsity/model_sparsity": 0.02014082300305795, "compression_loss": 18.04047203063965, "distillation_loss": 0.40711507201194763, "epoch": 1.06, "learning_rate": 4.2428450670208916e-05, "loss": 18.5116, "step": 1254, "task_loss": 0.7414783835411072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16932644272731723, "compression/movement_sparsity/importance_threshold": -0.0038068703233936057, "compression/movement_sparsity/linear_layer_sparsity": 0.021520511190020485, "compression/movement_sparsity/model_sparsity": 0.02078121542580713, "compression_loss": 18.2822208404541, "distillation_loss": 0.17223873734474182, "epoch": 1.06, "learning_rate": 4.242241275208308e-05, "loss": 18.7306, "step": 1255, "task_loss": 0.787796139717102 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17156344392237388, "compression/movement_sparsity/importance_threshold": -0.0037966184339620736, "compression/movement_sparsity/linear_layer_sparsity": 0.02209667504601775, "compression/movement_sparsity/model_sparsity": 0.021337586280863546, "compression_loss": 18.523513793945312, "distillation_loss": 0.46004176139831543, "epoch": 1.06, "learning_rate": 4.241637483395725e-05, "loss": 18.9812, "step": 1256, "task_loss": 0.41924434900283813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17379642534996842, "compression/movement_sparsity/importance_threshold": -0.0037863849666090153, "compression/movement_sparsity/linear_layer_sparsity": 0.022979480796938866, "compression/movement_sparsity/model_sparsity": 0.022190064938412375, "compression_loss": 18.764373779296875, "distillation_loss": 0.6463733911514282, "epoch": 1.06, "learning_rate": 4.2410336915831424e-05, "loss": 19.1834, "step": 1257, "task_loss": 0.8667296767234802 }, { "compression/movement_sparsity/importance_regularization_factor": 0.176025390625, "compression/movement_sparsity/importance_threshold": -0.003776169904767812, "compression/movement_sparsity/linear_layer_sparsity": 0.02376077610877588, "compression/movement_sparsity/model_sparsity": 0.022944520352742276, "compression_loss": 19.004806518554688, "distillation_loss": 0.3770235478878021, "epoch": 1.06, "learning_rate": 4.240429899770559e-05, "loss": 19.4146, "step": 1258, "task_loss": 0.7750553488731384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17825034336236723, "compression/movement_sparsity/importance_threshold": -0.003765973231871848, "compression/movement_sparsity/linear_layer_sparsity": 0.02436214765515536, "compression/movement_sparsity/model_sparsity": 0.023525232936468354, "compression_loss": 19.244800567626953, "distillation_loss": 0.25710105895996094, "epoch": 1.06, "learning_rate": 4.2398261079579765e-05, "loss": 19.5277, "step": 1259, "task_loss": 0.054146286100149155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1804712871769687, "compression/movement_sparsity/importance_threshold": -0.0037557949313545055, "compression/movement_sparsity/linear_layer_sparsity": 0.025171488609271927, "compression/movement_sparsity/model_sparsity": 0.024306770538987017, "compression_loss": 19.484344482421875, "distillation_loss": 0.4451633393764496, "epoch": 1.07, "learning_rate": 4.239222316145393e-05, "loss": 19.8566, "step": 1260, "task_loss": 0.5631954669952393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18268822568370358, "compression/movement_sparsity/importance_threshold": -0.0037456349866491667, "compression/movement_sparsity/linear_layer_sparsity": 0.026060554548201872, "compression/movement_sparsity/model_sparsity": 0.025165294327827978, "compression_loss": 19.723426818847656, "distillation_loss": 0.4827326536178589, "epoch": 1.07, "learning_rate": 4.23861852433281e-05, "loss": 20.1665, "step": 1261, "task_loss": 0.8670202493667603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18490116249747057, "compression/movement_sparsity/importance_threshold": -0.003735493381189214, "compression/movement_sparsity/linear_layer_sparsity": 0.026663845885570726, "compression/movement_sparsity/model_sparsity": 0.025747860751816974, "compression_loss": 19.96208381652832, "distillation_loss": 0.27411961555480957, "epoch": 1.07, "learning_rate": 4.2380147325202274e-05, "loss": 20.4008, "step": 1262, "task_loss": 0.3769223988056183 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1871101012331684, "compression/movement_sparsity/importance_threshold": -0.0037253700984080313, "compression/movement_sparsity/linear_layer_sparsity": 0.027493589090512256, "compression/movement_sparsity/model_sparsity": 0.026549099725080096, "compression_loss": 20.20026969909668, "distillation_loss": 0.3444983959197998, "epoch": 1.07, "learning_rate": 4.237410940707644e-05, "loss": 20.6702, "step": 1263, "task_loss": 0.28103652596473694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18931504550569556, "compression/movement_sparsity/importance_threshold": -0.0037152651217390014, "compression/movement_sparsity/linear_layer_sparsity": 0.028388569416589588, "compression/movement_sparsity/model_sparsity": 0.02741333472367515, "compression_loss": 20.438045501708984, "distillation_loss": 0.5734044313430786, "epoch": 1.07, "learning_rate": 4.236807148895061e-05, "loss": 20.9796, "step": 1264, "task_loss": 0.8202968835830688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19151599892995141, "compression/movement_sparsity/importance_threshold": -0.0037051784346155054, "compression/movement_sparsity/linear_layer_sparsity": 0.029251545276732266, "compression/movement_sparsity/model_sparsity": 0.028246664708197654, "compression_loss": 20.675390243530273, "distillation_loss": 0.275103360414505, "epoch": 1.07, "learning_rate": 4.236203357082478e-05, "loss": 20.9951, "step": 1265, "task_loss": 0.4522629380226135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19371296512083447, "compression/movement_sparsity/importance_threshold": -0.0036951100204709268, "compression/movement_sparsity/linear_layer_sparsity": 0.030180127907207463, "compression/movement_sparsity/model_sparsity": 0.029143347668661727, "compression_loss": 20.912269592285156, "distillation_loss": 0.34891802072525024, "epoch": 1.07, "learning_rate": 4.235599565269895e-05, "loss": 21.3909, "step": 1266, "task_loss": 0.9437047243118286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19590594769324332, "compression/movement_sparsity/importance_threshold": -0.003685059862738649, "compression/movement_sparsity/linear_layer_sparsity": 0.031078005806020312, "compression/movement_sparsity/model_sparsity": 0.03001038069945485, "compression_loss": 21.148706436157227, "distillation_loss": 0.386344313621521, "epoch": 1.07, "learning_rate": 4.234995773457312e-05, "loss": 21.666, "step": 1267, "task_loss": 1.27640962600708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19809495026207724, "compression/movement_sparsity/importance_threshold": -0.003675027944852053, "compression/movement_sparsity/linear_layer_sparsity": 0.03179729544615084, "compression/movement_sparsity/model_sparsity": 0.030704960527653143, "compression_loss": 21.384714126586914, "distillation_loss": 0.31338009238243103, "epoch": 1.07, "learning_rate": 4.234391981644729e-05, "loss": 21.8774, "step": 1268, "task_loss": 0.7733809947967529 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20027997644223483, "compression/movement_sparsity/importance_threshold": -0.003665014250244522, "compression/movement_sparsity/linear_layer_sparsity": 0.03276843543091843, "compression/movement_sparsity/model_sparsity": 0.031642738866367925, "compression_loss": 21.620277404785156, "distillation_loss": 0.5215176939964294, "epoch": 1.07, "learning_rate": 4.2337881898321464e-05, "loss": 21.9934, "step": 1269, "task_loss": 0.8034562468528748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20246102984861447, "compression/movement_sparsity/importance_threshold": -0.003655018762349441, "compression/movement_sparsity/linear_layer_sparsity": 0.03372277426348709, "compression/movement_sparsity/model_sparsity": 0.03256429322414821, "compression_loss": 21.855430603027344, "distillation_loss": 0.6203305721282959, "epoch": 1.07, "learning_rate": 4.233184398019563e-05, "loss": 22.2194, "step": 1270, "task_loss": 0.571286678314209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20463811409611565, "compression/movement_sparsity/importance_threshold": -0.003645041464600189, "compression/movement_sparsity/linear_layer_sparsity": 0.03460077457485096, "compression/movement_sparsity/model_sparsity": 0.033412131523771836, "compression_loss": 22.090139389038086, "distillation_loss": 0.4532524645328522, "epoch": 1.07, "learning_rate": 4.23258060620698e-05, "loss": 22.4761, "step": 1271, "task_loss": 0.47811877727508545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20681123279963676, "compression/movement_sparsity/importance_threshold": -0.0036350823404301506, "compression/movement_sparsity/linear_layer_sparsity": 0.03550761944772598, "compression/movement_sparsity/model_sparsity": 0.034287823485482455, "compression_loss": 22.32438850402832, "distillation_loss": 0.36718758940696716, "epoch": 1.08, "learning_rate": 4.231976814394397e-05, "loss": 22.6763, "step": 1272, "task_loss": 0.5815104842185974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2089803895740765, "compression/movement_sparsity/importance_threshold": -0.003625141373272709, "compression/movement_sparsity/linear_layer_sparsity": 0.036544366202826084, "compression/movement_sparsity/model_sparsity": 0.03528895480013881, "compression_loss": 22.558198928833008, "distillation_loss": 0.30269962549209595, "epoch": 1.08, "learning_rate": 4.231373022581814e-05, "loss": 22.9624, "step": 1273, "task_loss": 0.7353211045265198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21114558803433403, "compression/movement_sparsity/importance_threshold": -0.0036152185465612454, "compression/movement_sparsity/linear_layer_sparsity": 0.03735675974385743, "compression/movement_sparsity/model_sparsity": 0.03607344012382087, "compression_loss": 22.791576385498047, "distillation_loss": 0.47509849071502686, "epoch": 1.08, "learning_rate": 4.230769230769231e-05, "loss": 23.3483, "step": 1274, "task_loss": 1.5420186519622803 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21330683179530796, "compression/movement_sparsity/importance_threshold": -0.0036053138437291427, "compression/movement_sparsity/linear_layer_sparsity": 0.038425367874880566, "compression/movement_sparsity/model_sparsity": 0.03710533827812023, "compression_loss": 23.024497985839844, "distillation_loss": 0.2879154086112976, "epoch": 1.08, "learning_rate": 4.230165438956648e-05, "loss": 23.5338, "step": 1275, "task_loss": 0.4672480821609497 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2154641244718971, "compression/movement_sparsity/importance_threshold": -0.0035954272482097843, "compression/movement_sparsity/linear_layer_sparsity": 0.03933080569597455, "compression/movement_sparsity/model_sparsity": 0.0379796715246071, "compression_loss": 23.257001876831055, "distillation_loss": 0.5717579126358032, "epoch": 1.08, "learning_rate": 4.229561647144065e-05, "loss": 23.8107, "step": 1276, "task_loss": 0.42080381512641907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21761746967899998, "compression/movement_sparsity/importance_threshold": -0.003585558743436554, "compression/movement_sparsity/linear_layer_sparsity": 0.04032482815843537, "compression/movement_sparsity/model_sparsity": 0.03893954625751161, "compression_loss": 23.489076614379883, "distillation_loss": 0.324224054813385, "epoch": 1.08, "learning_rate": 4.2289578553314815e-05, "loss": 23.8813, "step": 1277, "task_loss": 0.3293912708759308 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21976687103151593, "compression/movement_sparsity/importance_threshold": -0.003575708312842832, "compression/movement_sparsity/linear_layer_sparsity": 0.041313711304645125, "compression/movement_sparsity/model_sparsity": 0.03989445822548867, "compression_loss": 23.7206974029541, "distillation_loss": 0.49534380435943604, "epoch": 1.08, "learning_rate": 4.228354063518899e-05, "loss": 24.2039, "step": 1278, "task_loss": 0.7115806341171265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22191233214434336, "compression/movement_sparsity/importance_threshold": -0.003565875939862002, "compression/movement_sparsity/linear_layer_sparsity": 0.04256684592983324, "compression/movement_sparsity/model_sparsity": 0.04110454382120855, "compression_loss": 23.951908111572266, "distillation_loss": 0.3860517740249634, "epoch": 1.08, "learning_rate": 4.227750271706316e-05, "loss": 24.4659, "step": 1279, "task_loss": 0.05055927112698555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2240538566323813, "compression/movement_sparsity/importance_threshold": -0.0035560616079274463, "compression/movement_sparsity/linear_layer_sparsity": 0.043661234111285106, "compression/movement_sparsity/model_sparsity": 0.04216133640189571, "compression_loss": 24.182682037353516, "distillation_loss": 0.47242969274520874, "epoch": 1.08, "learning_rate": 4.2271464798937324e-05, "loss": 24.5765, "step": 1280, "task_loss": 0.8867209553718567 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22619144811052805, "compression/movement_sparsity/importance_threshold": -0.0035462653004725495, "compression/movement_sparsity/linear_layer_sparsity": 0.0446983743639172, "compression/movement_sparsity/model_sparsity": 0.043162847696233284, "compression_loss": 24.41301727294922, "distillation_loss": 0.31775909662246704, "epoch": 1.08, "learning_rate": 4.22654268808115e-05, "loss": 24.8527, "step": 1281, "task_loss": 0.5015381574630737 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22832511019368307, "compression/movement_sparsity/importance_threshold": -0.0035364870009306913, "compression/movement_sparsity/linear_layer_sparsity": 0.045698311213525404, "compression/movement_sparsity/model_sparsity": 0.044128433638891885, "compression_loss": 24.642908096313477, "distillation_loss": 0.6811873912811279, "epoch": 1.08, "learning_rate": 4.225938896268567e-05, "loss": 25.1448, "step": 1282, "task_loss": 1.4205913543701172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23045484649674486, "compression/movement_sparsity/importance_threshold": -0.0035267266927352563, "compression/movement_sparsity/linear_layer_sparsity": 0.046521579595440656, "compression/movement_sparsity/model_sparsity": 0.044923420219218566, "compression_loss": 24.872400283813477, "distillation_loss": 0.5138095021247864, "epoch": 1.08, "learning_rate": 4.225335104455984e-05, "loss": 25.4367, "step": 1283, "task_loss": 0.3787994086742401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2325806606346119, "compression/movement_sparsity/importance_threshold": -0.0035169843593196278, "compression/movement_sparsity/linear_layer_sparsity": 0.047665369603407906, "compression/movement_sparsity/model_sparsity": 0.04602791752170251, "compression_loss": 25.10146713256836, "distillation_loss": 0.3439091444015503, "epoch": 1.09, "learning_rate": 4.2247313126434006e-05, "loss": 25.8084, "step": 1284, "task_loss": 0.3350301682949066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2347025562221836, "compression/movement_sparsity/importance_threshold": -0.0035072599841171857, "compression/movement_sparsity/linear_layer_sparsity": 0.04863934754007283, "compression/movement_sparsity/model_sparsity": 0.0469684363199364, "compression_loss": 25.330123901367188, "distillation_loss": 0.43416571617126465, "epoch": 1.09, "learning_rate": 4.224127520830818e-05, "loss": 25.846, "step": 1285, "task_loss": 0.9758123755455017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23682053687435844, "compression/movement_sparsity/importance_threshold": -0.003497553550561315, "compression/movement_sparsity/linear_layer_sparsity": 0.049807176669993986, "compression/movement_sparsity/model_sparsity": 0.04809614692658214, "compression_loss": 25.5583553314209, "distillation_loss": 0.426790714263916, "epoch": 1.09, "learning_rate": 4.223523729018235e-05, "loss": 26.1859, "step": 1286, "task_loss": 0.50923091173172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23893460620603502, "compression/movement_sparsity/importance_threshold": -0.0034878650420853982, "compression/movement_sparsity/linear_layer_sparsity": 0.0509383866811054, "compression/movement_sparsity/model_sparsity": 0.049188496393802836, "compression_loss": 25.78615379333496, "distillation_loss": 0.4497114419937134, "epoch": 1.09, "learning_rate": 4.2229199372056514e-05, "loss": 26.4702, "step": 1287, "task_loss": 0.10824604332447052 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2410447678321126, "compression/movement_sparsity/importance_threshold": -0.003478194442122817, "compression/movement_sparsity/linear_layer_sparsity": 0.05214244143230431, "compression/movement_sparsity/model_sparsity": 0.050351188160192385, "compression_loss": 26.01352310180664, "distillation_loss": 0.5232141017913818, "epoch": 1.09, "learning_rate": 4.222316145393069e-05, "loss": 26.5067, "step": 1288, "task_loss": 0.949741780757904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24315102536748967, "compression/movement_sparsity/importance_threshold": -0.0034685417341069544, "compression/movement_sparsity/linear_layer_sparsity": 0.05339402591569976, "compression/movement_sparsity/model_sparsity": 0.051559776866258976, "compression_loss": 26.240480422973633, "distillation_loss": 0.3940143585205078, "epoch": 1.09, "learning_rate": 4.221712353580486e-05, "loss": 26.851, "step": 1289, "task_loss": 0.2445281744003296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24525338242706485, "compression/movement_sparsity/importance_threshold": -0.003458906901471194, "compression/movement_sparsity/linear_layer_sparsity": 0.054519130752981614, "compression/movement_sparsity/model_sparsity": 0.052646230891152876, "compression_loss": 26.467012405395508, "distillation_loss": 0.29383352398872375, "epoch": 1.09, "learning_rate": 4.221108561767902e-05, "loss": 26.9849, "step": 1290, "task_loss": 0.4865871071815491 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2473518426257375, "compression/movement_sparsity/importance_threshold": -0.0034492899276489164, "compression/movement_sparsity/linear_layer_sparsity": 0.055676120814521765, "compression/movement_sparsity/model_sparsity": 0.05376347478476137, "compression_loss": 26.693098068237305, "distillation_loss": 0.3016450107097626, "epoch": 1.09, "learning_rate": 4.22050476995532e-05, "loss": 27.1098, "step": 1291, "task_loss": 0.7388697862625122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24944640957840603, "compression/movement_sparsity/importance_threshold": -0.003439690796073506, "compression/movement_sparsity/linear_layer_sparsity": 0.056808487469893865, "compression/movement_sparsity/model_sparsity": 0.054856941161954144, "compression_loss": 26.918764114379883, "distillation_loss": 0.6567619442939758, "epoch": 1.09, "learning_rate": 4.219900978142737e-05, "loss": 27.5956, "step": 1292, "task_loss": 0.7037306427955627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25153708689996934, "compression/movement_sparsity/importance_threshold": -0.0034301094901783447, "compression/movement_sparsity/linear_layer_sparsity": 0.0579045927314853, "compression/movement_sparsity/model_sparsity": 0.05591539183579572, "compression_loss": 27.144041061401367, "distillation_loss": 0.5370960235595703, "epoch": 1.09, "learning_rate": 4.219297186330153e-05, "loss": 27.6641, "step": 1293, "task_loss": 0.41341301798820496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25362387820532584, "compression/movement_sparsity/importance_threshold": -0.003420545993396817, "compression/movement_sparsity/linear_layer_sparsity": 0.05917929817592669, "compression/movement_sparsity/model_sparsity": 0.057146307226767924, "compression_loss": 27.368873596191406, "distillation_loss": 0.3748165965080261, "epoch": 1.09, "learning_rate": 4.2186933945175705e-05, "loss": 27.7874, "step": 1294, "task_loss": 0.5658988356590271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.255706787109375, "compression/movement_sparsity/importance_threshold": -0.0034110002891623026, "compression/movement_sparsity/linear_layer_sparsity": 0.06037460058808087, "compression/movement_sparsity/model_sparsity": 0.05830054732388428, "compression_loss": 27.59325408935547, "distillation_loss": 0.44830477237701416, "epoch": 1.09, "learning_rate": 4.218089602704988e-05, "loss": 28.1787, "step": 1295, "task_loss": 0.46517813205718994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2577858172270152, "compression/movement_sparsity/importance_threshold": -0.003401472360908186, "compression/movement_sparsity/linear_layer_sparsity": 0.06175476337109383, "compression/movement_sparsity/model_sparsity": 0.05963329726942341, "compression_loss": 27.817203521728516, "distillation_loss": 0.5405295491218567, "epoch": 1.1, "learning_rate": 4.217485810892404e-05, "loss": 28.2649, "step": 1296, "task_loss": 0.36690667271614075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2598609721731452, "compression/movement_sparsity/importance_threshold": -0.00339196219206785, "compression/movement_sparsity/linear_layer_sparsity": 0.06294416332026827, "compression/movement_sparsity/model_sparsity": 0.06078183767132147, "compression_loss": 28.04070472717285, "distillation_loss": 0.5979585647583008, "epoch": 1.1, "learning_rate": 4.216882019079821e-05, "loss": 28.4644, "step": 1297, "task_loss": 0.4105743169784546 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2619322555626641, "compression/movement_sparsity/importance_threshold": -0.003382469766074676, "compression/movement_sparsity/linear_layer_sparsity": 0.06421867797802748, "compression/movement_sparsity/model_sparsity": 0.06201256882972096, "compression_loss": 28.2637939453125, "distillation_loss": 0.5472410917282104, "epoch": 1.1, "learning_rate": 4.216278227267239e-05, "loss": 28.5986, "step": 1298, "task_loss": 0.234819695353508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2639996710104705, "compression/movement_sparsity/importance_threshold": -0.003372995066362047, "compression/movement_sparsity/linear_layer_sparsity": 0.06535296442438895, "compression/movement_sparsity/model_sparsity": 0.06310788904717665, "compression_loss": 28.486465454101562, "distillation_loss": 0.5468919277191162, "epoch": 1.1, "learning_rate": 4.2156744354546554e-05, "loss": 28.9692, "step": 1299, "task_loss": 0.681564450263977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2660632221314633, "compression/movement_sparsity/importance_threshold": -0.0033635380763633463, "compression/movement_sparsity/linear_layer_sparsity": 0.06668847119960561, "compression/movement_sparsity/model_sparsity": 0.06439751705616523, "compression_loss": 28.708694458007812, "distillation_loss": 0.5030103921890259, "epoch": 1.1, "learning_rate": 4.215070643642072e-05, "loss": 29.1958, "step": 1300, "task_loss": 0.3571729063987732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.26812291254054077, "compression/movement_sparsity/importance_threshold": -0.0033540987795119583, "compression/movement_sparsity/linear_layer_sparsity": 0.06797363413906365, "compression/movement_sparsity/model_sparsity": 0.06563853069502924, "compression_loss": 28.930509567260742, "distillation_loss": 0.5750807523727417, "epoch": 1.1, "learning_rate": 4.2144668518294896e-05, "loss": 29.4828, "step": 1301, "task_loss": 0.47699689865112305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2701787458526025, "compression/movement_sparsity/importance_threshold": -0.0033446771592412616, "compression/movement_sparsity/linear_layer_sparsity": 0.06913210279739065, "compression/movement_sparsity/model_sparsity": 0.06675720239107626, "compression_loss": 29.151933670043945, "distillation_loss": 1.6764183044433594, "epoch": 1.1, "learning_rate": 4.213863060016906e-05, "loss": 30.3527, "step": 1302, "task_loss": 2.7316722869873047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.272230725682547, "compression/movement_sparsity/importance_threshold": -0.003335273198984641, "compression/movement_sparsity/linear_layer_sparsity": 0.07039941525789781, "compression/movement_sparsity/model_sparsity": 0.06798097876985586, "compression_loss": 29.372962951660156, "distillation_loss": 1.0377256870269775, "epoch": 1.1, "learning_rate": 4.213259268204323e-05, "loss": 30.3015, "step": 1303, "task_loss": 0.7121099829673767 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27427885564527266, "compression/movement_sparsity/importance_threshold": -0.003325886882175481, "compression/movement_sparsity/linear_layer_sparsity": 0.07153860253715762, "compression/movement_sparsity/model_sparsity": 0.0690810314615231, "compression_loss": 29.59358787536621, "distillation_loss": 0.5788123607635498, "epoch": 1.1, "learning_rate": 4.2126554763917404e-05, "loss": 30.2963, "step": 1304, "task_loss": 1.2201249599456787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27632313935567887, "compression/movement_sparsity/importance_threshold": -0.003316518192247161, "compression/movement_sparsity/linear_layer_sparsity": 0.07267762287807052, "compression/movement_sparsity/model_sparsity": 0.07018092294968922, "compression_loss": 29.81377601623535, "distillation_loss": 0.4418812394142151, "epoch": 1.1, "learning_rate": 4.212051684579157e-05, "loss": 30.3032, "step": 1305, "task_loss": 1.03061842918396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27836358042866427, "compression/movement_sparsity/importance_threshold": -0.003307167112633064, "compression/movement_sparsity/linear_layer_sparsity": 0.0738353522380041, "compression/movement_sparsity/model_sparsity": 0.07129888074451698, "compression_loss": 30.033525466918945, "distillation_loss": 0.4356471598148346, "epoch": 1.1, "learning_rate": 4.211447892766574e-05, "loss": 30.3986, "step": 1306, "task_loss": 0.6703677177429199 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2804001824791271, "compression/movement_sparsity/importance_threshold": -0.0032978336267665766, "compression/movement_sparsity/linear_layer_sparsity": 0.07538897588761596, "compression/movement_sparsity/model_sparsity": 0.0727991326422593, "compression_loss": 30.25279426574707, "distillation_loss": 0.6686887741088867, "epoch": 1.1, "learning_rate": 4.210844100953991e-05, "loss": 30.9003, "step": 1307, "task_loss": 1.5936065912246704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28243294912196704, "compression/movement_sparsity/importance_threshold": -0.0032885177180810767, "compression/movement_sparsity/linear_layer_sparsity": 0.07678862276054592, "compression/movement_sparsity/model_sparsity": 0.0741506973392867, "compression_loss": 30.471668243408203, "distillation_loss": 0.555501401424408, "epoch": 1.11, "learning_rate": 4.2102403091414086e-05, "loss": 31.0259, "step": 1308, "task_loss": 0.35147160291671753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2844618839720825, "compression/movement_sparsity/importance_threshold": -0.0032792193700099488, "compression/movement_sparsity/linear_layer_sparsity": 0.07801583424529368, "compression/movement_sparsity/model_sparsity": 0.07533575033418925, "compression_loss": 30.690101623535156, "distillation_loss": 0.315677285194397, "epoch": 1.11, "learning_rate": 4.209636517328825e-05, "loss": 31.2009, "step": 1309, "task_loss": 0.39304590225219727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2864869906443719, "compression/movement_sparsity/importance_threshold": -0.003269938565986577, "compression/movement_sparsity/linear_layer_sparsity": 0.07928921610712748, "compression/movement_sparsity/model_sparsity": 0.07656538761168827, "compression_loss": 30.90812110900879, "distillation_loss": 0.6151688694953918, "epoch": 1.11, "learning_rate": 4.209032725516242e-05, "loss": 31.4699, "step": 1310, "task_loss": 0.6047874093055725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2885082727537346, "compression/movement_sparsity/importance_threshold": -0.003260675289444341, "compression/movement_sparsity/linear_layer_sparsity": 0.08062423399147108, "compression/movement_sparsity/model_sparsity": 0.07785454352470926, "compression_loss": 31.125713348388672, "distillation_loss": 0.5884499549865723, "epoch": 1.11, "learning_rate": 4.2084289337036595e-05, "loss": 31.7647, "step": 1311, "task_loss": 0.6690028309822083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2905257339150692, "compression/movement_sparsity/importance_threshold": -0.0032514295238166256, "compression/movement_sparsity/linear_layer_sparsity": 0.08199393927946738, "compression/movement_sparsity/model_sparsity": 0.07917719522235657, "compression_loss": 31.342893600463867, "distillation_loss": 0.6261318325996399, "epoch": 1.11, "learning_rate": 4.207825141891076e-05, "loss": 31.8404, "step": 1312, "task_loss": 0.6102679371833801 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29253937774327454, "compression/movement_sparsity/importance_threshold": -0.0032422012525368128, "compression/movement_sparsity/linear_layer_sparsity": 0.08356951532169687, "compression/movement_sparsity/model_sparsity": 0.08069864538049662, "compression_loss": 31.559642791748047, "distillation_loss": 0.4630987048149109, "epoch": 1.11, "learning_rate": 4.207221350078493e-05, "loss": 31.9321, "step": 1313, "task_loss": 1.411203145980835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.294549207853249, "compression/movement_sparsity/importance_threshold": -0.003232990459038287, "compression/movement_sparsity/linear_layer_sparsity": 0.08496195999737477, "compression/movement_sparsity/model_sparsity": 0.08204325529790413, "compression_loss": 31.77594566345215, "distillation_loss": 0.7190670967102051, "epoch": 1.11, "learning_rate": 4.20661755826591e-05, "loss": 32.3696, "step": 1314, "task_loss": 0.6938521862030029 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29655522785989197, "compression/movement_sparsity/importance_threshold": -0.0032237971267544277, "compression/movement_sparsity/linear_layer_sparsity": 0.08648111087835136, "compression/movement_sparsity/model_sparsity": 0.08351021867266442, "compression_loss": 31.991853713989258, "distillation_loss": 0.3729756772518158, "epoch": 1.11, "learning_rate": 4.206013766453327e-05, "loss": 32.4918, "step": 1315, "task_loss": 1.2942397594451904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29855744137810203, "compression/movement_sparsity/importance_threshold": -0.0032146212391186194, "compression/movement_sparsity/linear_layer_sparsity": 0.08799527745725615, "compression/movement_sparsity/model_sparsity": 0.0849723689714626, "compression_loss": 32.207332611083984, "distillation_loss": 0.45549535751342773, "epoch": 1.11, "learning_rate": 4.205409974640744e-05, "loss": 32.6774, "step": 1316, "task_loss": 0.4734266400337219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.300555852022778, "compression/movement_sparsity/importance_threshold": -0.003205462779564244, "compression/movement_sparsity/linear_layer_sparsity": 0.08955389733310744, "compression/movement_sparsity/model_sparsity": 0.08647744545970282, "compression_loss": 32.42241287231445, "distillation_loss": 0.5190247297286987, "epoch": 1.11, "learning_rate": 4.204806182828161e-05, "loss": 32.9471, "step": 1317, "task_loss": 0.44229868054389954 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3025504634088183, "compression/movement_sparsity/importance_threshold": -0.0031963217315246865, "compression/movement_sparsity/linear_layer_sparsity": 0.0910788910562256, "compression/movement_sparsity/model_sparsity": 0.08795005095700244, "compression_loss": 32.63706588745117, "distillation_loss": 0.3707108497619629, "epoch": 1.11, "learning_rate": 4.204202391015578e-05, "loss": 33.2683, "step": 1318, "task_loss": 0.3607706129550934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3045412791511223, "compression/movement_sparsity/importance_threshold": -0.0031871980784333267, "compression/movement_sparsity/linear_layer_sparsity": 0.09247149074608277, "compression/movement_sparsity/model_sparsity": 0.08929481056337527, "compression_loss": 32.851295471191406, "distillation_loss": 0.5315563082695007, "epoch": 1.11, "learning_rate": 4.203598599202995e-05, "loss": 33.3456, "step": 1319, "task_loss": 0.38914695382118225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.30652830286458854, "compression/movement_sparsity/importance_threshold": -0.003178091803723548, "compression/movement_sparsity/linear_layer_sparsity": 0.09392456981418904, "compression/movement_sparsity/model_sparsity": 0.090697971895298, "compression_loss": 33.06510543823242, "distillation_loss": 0.5237348079681396, "epoch": 1.12, "learning_rate": 4.202994807390412e-05, "loss": 33.7713, "step": 1320, "task_loss": 0.9074419736862183 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3085115381641157, "compression/movement_sparsity/importance_threshold": -0.0031690028908287343, "compression/movement_sparsity/linear_layer_sparsity": 0.0953713648459512, "compression/movement_sparsity/model_sparsity": 0.09209506506685702, "compression_loss": 33.278507232666016, "distillation_loss": 0.8387680053710938, "epoch": 1.12, "learning_rate": 4.202391015577829e-05, "loss": 33.8797, "step": 1321, "task_loss": 0.5010203123092651 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3104909886646028, "compression/movement_sparsity/importance_threshold": -0.003159931323182267, "compression/movement_sparsity/linear_layer_sparsity": 0.0969221386194981, "compression/movement_sparsity/model_sparsity": 0.09359256499054444, "compression_loss": 33.491485595703125, "distillation_loss": 0.6050524115562439, "epoch": 1.12, "learning_rate": 4.201787223765246e-05, "loss": 34.2009, "step": 1322, "task_loss": 1.4805500507354736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31246665798094864, "compression/movement_sparsity/importance_threshold": -0.003150877084217528, "compression/movement_sparsity/linear_layer_sparsity": 0.09860265926109081, "compression/movement_sparsity/model_sparsity": 0.09521535457821224, "compression_loss": 33.70405578613281, "distillation_loss": 0.5349466800689697, "epoch": 1.12, "learning_rate": 4.201183431952663e-05, "loss": 34.2746, "step": 1323, "task_loss": 0.6244620680809021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3144385497280515, "compression/movement_sparsity/importance_threshold": -0.0031418401573679033, "compression/movement_sparsity/linear_layer_sparsity": 0.10007902622858991, "compression/movement_sparsity/model_sparsity": 0.09664100379854172, "compression_loss": 33.916236877441406, "distillation_loss": 0.2783386707305908, "epoch": 1.12, "learning_rate": 4.20057964014008e-05, "loss": 34.2905, "step": 1324, "task_loss": 0.7754218578338623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31640666752081104, "compression/movement_sparsity/importance_threshold": -0.003132820526066772, "compression/movement_sparsity/linear_layer_sparsity": 0.10171260142220025, "compression/movement_sparsity/model_sparsity": 0.09821846065878644, "compression_loss": 34.12797546386719, "distillation_loss": 0.5289784669876099, "epoch": 1.12, "learning_rate": 4.199975848327497e-05, "loss": 34.6002, "step": 1325, "task_loss": 0.5593292713165283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31837101497412545, "compression/movement_sparsity/importance_threshold": -0.003123818173747518, "compression/movement_sparsity/linear_layer_sparsity": 0.10347095110595224, "compression/movement_sparsity/model_sparsity": 0.09991640562158521, "compression_loss": 34.33928298950195, "distillation_loss": 0.4046425521373749, "epoch": 1.12, "learning_rate": 4.1993720565149136e-05, "loss": 34.9293, "step": 1326, "task_loss": 0.5968723297119141 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3203315957028935, "compression/movement_sparsity/importance_threshold": -0.0031148330838435254, "compression/movement_sparsity/linear_layer_sparsity": 0.10494576793165869, "compression/movement_sparsity/model_sparsity": 0.1013405579522614, "compression_loss": 34.55019760131836, "distillation_loss": 0.8296120166778564, "epoch": 1.12, "learning_rate": 4.198768264702331e-05, "loss": 35.1826, "step": 1327, "task_loss": 0.3156295716762543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3222884133220144, "compression/movement_sparsity/importance_threshold": -0.0031058652397881743, "compression/movement_sparsity/linear_layer_sparsity": 0.1064096025989725, "compression/movement_sparsity/model_sparsity": 0.10275410539547082, "compression_loss": 34.76069641113281, "distillation_loss": 0.16450577974319458, "epoch": 1.12, "learning_rate": 4.198164472889748e-05, "loss": 35.1114, "step": 1328, "task_loss": 0.2194129377603531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3242414714463868, "compression/movement_sparsity/importance_threshold": -0.003096914625014848, "compression/movement_sparsity/linear_layer_sparsity": 0.1081530232248444, "compression/movement_sparsity/model_sparsity": 0.10443763415945484, "compression_loss": 34.97080612182617, "distillation_loss": 0.5031836628913879, "epoch": 1.12, "learning_rate": 4.197560681077165e-05, "loss": 35.6032, "step": 1329, "task_loss": 0.10239604860544205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3261907736909093, "compression/movement_sparsity/importance_threshold": -0.003087981222956931, "compression/movement_sparsity/linear_layer_sparsity": 0.1098598724285771, "compression/movement_sparsity/model_sparsity": 0.106085847842157, "compression_loss": 35.18047332763672, "distillation_loss": 0.34471455216407776, "epoch": 1.12, "learning_rate": 4.196956889264582e-05, "loss": 35.7101, "step": 1330, "task_loss": 0.8245468735694885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3281363236704806, "compression/movement_sparsity/importance_threshold": -0.0030790650170478056, "compression/movement_sparsity/linear_layer_sparsity": 0.11167050958741236, "compression/movement_sparsity/model_sparsity": 0.10783428404441484, "compression_loss": 35.38974380493164, "distillation_loss": 0.8861311674118042, "epoch": 1.13, "learning_rate": 4.1963530974519986e-05, "loss": 36.0064, "step": 1331, "task_loss": 1.3413630723953247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.330078125, "compression/movement_sparsity/importance_threshold": -0.0030701659907208523, "compression/movement_sparsity/linear_layer_sparsity": 0.11339704559191187, "compression/movement_sparsity/model_sparsity": 0.10950150822571378, "compression_loss": 35.59859848022461, "distillation_loss": 0.4705164432525635, "epoch": 1.13, "learning_rate": 4.195749305639416e-05, "loss": 36.2974, "step": 1332, "task_loss": 1.2239863872528076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33201618129436594, "compression/movement_sparsity/importance_threshold": -0.0030612841274094554, "compression/movement_sparsity/linear_layer_sparsity": 0.11512946021105586, "compression/movement_sparsity/model_sparsity": 0.11117440907315944, "compression_loss": 35.807064056396484, "distillation_loss": 0.3259223699569702, "epoch": 1.13, "learning_rate": 4.195145513826833e-05, "loss": 36.2741, "step": 1333, "task_loss": 1.4819735288619995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3339504961684774, "compression/movement_sparsity/importance_threshold": -0.0030524194105469964, "compression/movement_sparsity/linear_layer_sparsity": 0.11696160856830622, "compression/movement_sparsity/model_sparsity": 0.11294361749799063, "compression_loss": 36.015098571777344, "distillation_loss": 0.3800840675830841, "epoch": 1.13, "learning_rate": 4.1945417220142494e-05, "loss": 36.4816, "step": 1334, "task_loss": 0.9156310558319092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3358810732372326, "compression/movement_sparsity/importance_threshold": -0.0030435718235668615, "compression/movement_sparsity/linear_layer_sparsity": 0.11862398062675715, "compression/movement_sparsity/model_sparsity": 0.11454888196217915, "compression_loss": 36.222713470458984, "distillation_loss": 0.7434872388839722, "epoch": 1.13, "learning_rate": 4.193937930201667e-05, "loss": 36.7804, "step": 1335, "task_loss": 0.1640770435333252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3378079161155312, "compression/movement_sparsity/importance_threshold": -0.0030347413499024287, "compression/movement_sparsity/linear_layer_sparsity": 0.12011843655655986, "compression/movement_sparsity/model_sparsity": 0.11599199873330894, "compression_loss": 36.429935455322266, "distillation_loss": 0.5562115907669067, "epoch": 1.13, "learning_rate": 4.1933341383890835e-05, "loss": 36.9521, "step": 1336, "task_loss": 0.4118911027908325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3397310284182715, "compression/movement_sparsity/importance_threshold": -0.0030259279729870827, "compression/movement_sparsity/linear_layer_sparsity": 0.12197617417755677, "compression/movement_sparsity/model_sparsity": 0.11778591735195522, "compression_loss": 36.63679885864258, "distillation_loss": 0.5368242263793945, "epoch": 1.13, "learning_rate": 4.1927303465765e-05, "loss": 37.2596, "step": 1337, "task_loss": 0.8506963849067688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3416504137603522, "compression/movement_sparsity/importance_threshold": -0.0030171316762542073, "compression/movement_sparsity/linear_layer_sparsity": 0.12360804421519517, "compression/movement_sparsity/model_sparsity": 0.11936172763358131, "compression_loss": 36.84320068359375, "distillation_loss": 0.5745812654495239, "epoch": 1.13, "learning_rate": 4.1921265547639176e-05, "loss": 37.3585, "step": 1338, "task_loss": 1.6198886632919312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34356607575667253, "compression/movement_sparsity/importance_threshold": -0.003008352443137183, "compression/movement_sparsity/linear_layer_sparsity": 0.12519273032149844, "compression/movement_sparsity/model_sparsity": 0.1208919748970684, "compression_loss": 37.049259185791016, "distillation_loss": 0.9108892679214478, "epoch": 1.13, "learning_rate": 4.191522762951335e-05, "loss": 37.5975, "step": 1339, "task_loss": 1.2346673011779785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3454780180221311, "compression/movement_sparsity/importance_threshold": -0.002999590257069393, "compression/movement_sparsity/linear_layer_sparsity": 0.12675421199758236, "compression/movement_sparsity/model_sparsity": 0.12239981487389931, "compression_loss": 37.25482940673828, "distillation_loss": 0.6868091225624084, "epoch": 1.13, "learning_rate": 4.190918971138752e-05, "loss": 37.7536, "step": 1340, "task_loss": 1.4438891410827637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34738624417162633, "compression/movement_sparsity/importance_threshold": -0.002990845101484222, "compression/movement_sparsity/linear_layer_sparsity": 0.12833088506323434, "compression/movement_sparsity/model_sparsity": 0.12392232436933247, "compression_loss": 37.46003723144531, "distillation_loss": 0.36048561334609985, "epoch": 1.13, "learning_rate": 4.1903151793261685e-05, "loss": 38.0245, "step": 1341, "task_loss": 0.22680264711380005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3492907578200576, "compression/movement_sparsity/importance_threshold": -0.00298211695981505, "compression/movement_sparsity/linear_layer_sparsity": 0.13002502310426722, "compression/movement_sparsity/model_sparsity": 0.12555826355687763, "compression_loss": 37.66482925415039, "distillation_loss": 0.28527650237083435, "epoch": 1.13, "learning_rate": 4.189711387513586e-05, "loss": 38.3351, "step": 1342, "task_loss": 0.0643920823931694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3511915625823233, "compression/movement_sparsity/importance_threshold": -0.002973405815495261, "compression/movement_sparsity/linear_layer_sparsity": 0.1317379178609913, "compression/movement_sparsity/model_sparsity": 0.12721231510922765, "compression_loss": 37.869205474853516, "distillation_loss": 0.5674700736999512, "epoch": 1.14, "learning_rate": 4.1891075957010026e-05, "loss": 38.3702, "step": 1343, "task_loss": 0.4557296931743622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35308866207332235, "compression/movement_sparsity/importance_threshold": -0.0029647116519582383, "compression/movement_sparsity/linear_layer_sparsity": 0.13351817224069037, "compression/movement_sparsity/model_sparsity": 0.12893141227428098, "compression_loss": 38.07318878173828, "distillation_loss": 0.2756575345993042, "epoch": 1.14, "learning_rate": 4.188503803888419e-05, "loss": 38.5649, "step": 1344, "task_loss": 0.5992498397827148 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3549820599079536, "compression/movement_sparsity/importance_threshold": -0.002956034452637363, "compression/movement_sparsity/linear_layer_sparsity": 0.13528188779987851, "compression/movement_sparsity/model_sparsity": 0.13063453877818731, "compression_loss": 38.27675247192383, "distillation_loss": 0.5001899003982544, "epoch": 1.14, "learning_rate": 4.187900012075837e-05, "loss": 38.6948, "step": 1345, "task_loss": 1.3466458320617676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35687175970111595, "compression/movement_sparsity/importance_threshold": -0.002947374200966018, "compression/movement_sparsity/linear_layer_sparsity": 0.13701584063664754, "compression/movement_sparsity/model_sparsity": 0.13230892500075045, "compression_loss": 38.479881286621094, "distillation_loss": 0.6121162176132202, "epoch": 1.14, "learning_rate": 4.1872962202632534e-05, "loss": 38.9971, "step": 1346, "task_loss": 0.5631737112998962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35875776506770773, "compression/movement_sparsity/importance_threshold": -0.002938730880377588, "compression/movement_sparsity/linear_layer_sparsity": 0.1388431120093348, "compression/movement_sparsity/model_sparsity": 0.1340734239804417, "compression_loss": 38.6826057434082, "distillation_loss": 0.5731421709060669, "epoch": 1.14, "learning_rate": 4.18669242845067e-05, "loss": 39.1661, "step": 1347, "task_loss": 0.5402436256408691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36064007962262845, "compression/movement_sparsity/importance_threshold": -0.0029301044743054528, "compression/movement_sparsity/linear_layer_sparsity": 0.14069958566856233, "compression/movement_sparsity/model_sparsity": 0.13586612205829374, "compression_loss": 38.88492202758789, "distillation_loss": 0.29889822006225586, "epoch": 1.14, "learning_rate": 4.1860886366380875e-05, "loss": 39.4424, "step": 1348, "task_loss": 0.6926339864730835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3625187069807764, "compression/movement_sparsity/importance_threshold": -0.0029214949661829966, "compression/movement_sparsity/linear_layer_sparsity": 0.14261705144524728, "compression/movement_sparsity/model_sparsity": 0.13771771698673488, "compression_loss": 39.08681106567383, "distillation_loss": 0.3019513487815857, "epoch": 1.14, "learning_rate": 4.185484844825505e-05, "loss": 39.5156, "step": 1349, "task_loss": 0.04918494075536728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36439365075705055, "compression/movement_sparsity/importance_threshold": -0.0029129023394436015, "compression/movement_sparsity/linear_layer_sparsity": 0.14455128260162825, "compression/movement_sparsity/model_sparsity": 0.13958550135250314, "compression_loss": 39.28828811645508, "distillation_loss": 0.8031651973724365, "epoch": 1.14, "learning_rate": 4.184881053012921e-05, "loss": 39.8931, "step": 1350, "task_loss": 0.6291388273239136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36626491456634935, "compression/movement_sparsity/importance_threshold": -0.002904326577520652, "compression/movement_sparsity/linear_layer_sparsity": 0.1464842140237369, "compression/movement_sparsity/model_sparsity": 0.1414520306338698, "compression_loss": 39.489376068115234, "distillation_loss": 0.5299981832504272, "epoch": 1.14, "learning_rate": 4.1842772612003383e-05, "loss": 40.2387, "step": 1351, "task_loss": 1.3494641780853271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3681325020235722, "compression/movement_sparsity/importance_threshold": -0.0028957676638475278, "compression/movement_sparsity/linear_layer_sparsity": 0.14827671452359803, "compression/movement_sparsity/model_sparsity": 0.14318295322718413, "compression_loss": 39.690101623535156, "distillation_loss": 0.7169336676597595, "epoch": 1.14, "learning_rate": 4.183673469387756e-05, "loss": 40.5194, "step": 1352, "task_loss": 0.8853238821029663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3699964167436175, "compression/movement_sparsity/importance_threshold": -0.0028872255818576135, "compression/movement_sparsity/linear_layer_sparsity": 0.1500050987740811, "compression/movement_sparsity/model_sparsity": 0.14485196216153123, "compression_loss": 39.89042663574219, "distillation_loss": 0.6234084367752075, "epoch": 1.14, "learning_rate": 4.183069677575172e-05, "loss": 40.6448, "step": 1353, "task_loss": 0.9994140863418579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37185666234138415, "compression/movement_sparsity/importance_threshold": -0.002878700314984292, "compression/movement_sparsity/linear_layer_sparsity": 0.15201835138938494, "compression/movement_sparsity/model_sparsity": 0.1467960533560099, "compression_loss": 40.090362548828125, "distillation_loss": 0.7390552759170532, "epoch": 1.14, "learning_rate": 4.182465885762589e-05, "loss": 40.7546, "step": 1354, "task_loss": 0.5284013748168945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37371324243177073, "compression/movement_sparsity/importance_threshold": -0.002870191846660945, "compression/movement_sparsity/linear_layer_sparsity": 0.15393010548977232, "compression/movement_sparsity/model_sparsity": 0.14864213282180547, "compression_loss": 40.289913177490234, "distillation_loss": 0.7019363045692444, "epoch": 1.15, "learning_rate": 4.1818620939500066e-05, "loss": 40.9734, "step": 1355, "task_loss": 0.22970399260520935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3755661606296764, "compression/movement_sparsity/importance_threshold": -0.0028617001603209553, "compression/movement_sparsity/linear_layer_sparsity": 0.15569475113403605, "compression/movement_sparsity/model_sparsity": 0.15034615745950375, "compression_loss": 40.489078521728516, "distillation_loss": 0.8797866106033325, "epoch": 1.15, "learning_rate": 4.181258302137423e-05, "loss": 41.4394, "step": 1356, "task_loss": 0.6985245943069458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3774154205499999, "compression/movement_sparsity/importance_threshold": -0.0028532252393977057, "compression/movement_sparsity/linear_layer_sparsity": 0.15752655369042495, "compression/movement_sparsity/model_sparsity": 0.1521150319627969, "compression_loss": 40.68787384033203, "distillation_loss": 0.34558191895484924, "epoch": 1.15, "learning_rate": 4.18065451032484e-05, "loss": 41.1726, "step": 1357, "task_loss": 0.23322723805904388 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37926102580763965, "compression/movement_sparsity/importance_threshold": -0.0028447670673245797, "compression/movement_sparsity/linear_layer_sparsity": 0.15951748426391446, "compression/movement_sparsity/model_sparsity": 0.1540375679462682, "compression_loss": 40.88625717163086, "distillation_loss": 1.2111414670944214, "epoch": 1.15, "learning_rate": 4.1800507185122574e-05, "loss": 41.6459, "step": 1358, "task_loss": 1.603690505027771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.381102980017495, "compression/movement_sparsity/importance_threshold": -0.002836325627534958, "compression/movement_sparsity/linear_layer_sparsity": 0.16141841122008846, "compression/movement_sparsity/model_sparsity": 0.1558731922135623, "compression_loss": 41.08426284790039, "distillation_loss": 0.7422274351119995, "epoch": 1.15, "learning_rate": 4.179446926699674e-05, "loss": 41.8025, "step": 1359, "task_loss": 0.4388522803783417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3829412867944644, "compression/movement_sparsity/importance_threshold": -0.0028279009034622247, "compression/movement_sparsity/linear_layer_sparsity": 0.16329621721521653, "compression/movement_sparsity/model_sparsity": 0.1576864897959508, "compression_loss": 41.28182601928711, "distillation_loss": 1.1381157636642456, "epoch": 1.15, "learning_rate": 4.178843134887091e-05, "loss": 42.1654, "step": 1360, "task_loss": 1.1123270988464355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3847759497534464, "compression/movement_sparsity/importance_threshold": -0.0028194928785397633, "compression/movement_sparsity/linear_layer_sparsity": 0.1651219145977759, "compression/movement_sparsity/model_sparsity": 0.15944946885691713, "compression_loss": 41.47904586791992, "distillation_loss": 0.9483464360237122, "epoch": 1.15, "learning_rate": 4.178239343074508e-05, "loss": 42.2871, "step": 1361, "task_loss": 1.0567549467086792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3866069725093405, "compression/movement_sparsity/importance_threshold": -0.002811101536200954, "compression/movement_sparsity/linear_layer_sparsity": 0.1667681532574155, "compression/movement_sparsity/model_sparsity": 0.16103915415417566, "compression_loss": 41.675880432128906, "distillation_loss": 0.938665509223938, "epoch": 1.15, "learning_rate": 4.177635551261925e-05, "loss": 42.6116, "step": 1362, "task_loss": 0.8323412537574768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.388434358677045, "compression/movement_sparsity/importance_threshold": -0.0028027268598791815, "compression/movement_sparsity/linear_layer_sparsity": 0.168551472148197, "compression/movement_sparsity/model_sparsity": 0.1627612105549282, "compression_loss": 41.87230682373047, "distillation_loss": 0.8942568302154541, "epoch": 1.15, "learning_rate": 4.177031759449342e-05, "loss": 42.7738, "step": 1363, "task_loss": 0.40260952711105347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39025811187145865, "compression/movement_sparsity/importance_threshold": -0.0027943688330078283, "compression/movement_sparsity/linear_layer_sparsity": 0.17023220742480716, "compression/movement_sparsity/model_sparsity": 0.1643842074042403, "compression_loss": 42.06834411621094, "distillation_loss": 1.562467336654663, "epoch": 1.15, "learning_rate": 4.176427967636759e-05, "loss": 43.0285, "step": 1364, "task_loss": 1.1153866052627563 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39207823570748057, "compression/movement_sparsity/importance_threshold": -0.002786027439020276, "compression/movement_sparsity/linear_layer_sparsity": 0.17203423533460932, "compression/movement_sparsity/model_sparsity": 0.1661243301116545, "compression_loss": 42.264041900634766, "distillation_loss": 0.38737452030181885, "epoch": 1.15, "learning_rate": 4.1758241758241765e-05, "loss": 42.9378, "step": 1365, "task_loss": 1.1271378993988037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3938947338000094, "compression/movement_sparsity/importance_threshold": -0.0027777026613499075, "compression/movement_sparsity/linear_layer_sparsity": 0.1738251260718396, "compression/movement_sparsity/model_sparsity": 0.16785369824263657, "compression_loss": 42.45930862426758, "distillation_loss": 0.3895227313041687, "epoch": 1.15, "learning_rate": 4.1752203840115925e-05, "loss": 43.0773, "step": 1366, "task_loss": 0.8238438367843628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3957076097639437, "compression/movement_sparsity/importance_threshold": -0.0027693944834301073, "compression/movement_sparsity/linear_layer_sparsity": 0.17541688320955093, "compression/movement_sparsity/model_sparsity": 0.16939077362584984, "compression_loss": 42.65418243408203, "distillation_loss": 0.5807669162750244, "epoch": 1.16, "learning_rate": 4.17461659219901e-05, "loss": 43.3322, "step": 1367, "task_loss": 0.6619335412979126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39751686721418267, "compression/movement_sparsity/importance_threshold": -0.002761102888694255, "compression/movement_sparsity/linear_layer_sparsity": 0.17717597219169634, "compression/movement_sparsity/model_sparsity": 0.17108943248986788, "compression_loss": 42.84867858886719, "distillation_loss": 0.8476413488388062, "epoch": 1.16, "learning_rate": 4.174012800386427e-05, "loss": 43.7195, "step": 1368, "task_loss": 0.726919412612915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.399322509765625, "compression/movement_sparsity/importance_threshold": -0.002752827860575735, "compression/movement_sparsity/linear_layer_sparsity": 0.17888957047431095, "compression/movement_sparsity/model_sparsity": 0.17274416339982973, "compression_loss": 43.04276657104492, "distillation_loss": 0.4989110827445984, "epoch": 1.16, "learning_rate": 4.173409008573844e-05, "loss": 43.8204, "step": 1369, "task_loss": 0.7006256580352783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4011245410331694, "compression/movement_sparsity/importance_threshold": -0.0027445693825079295, "compression/movement_sparsity/linear_layer_sparsity": 0.1806491721956647, "compression/movement_sparsity/model_sparsity": 0.17444331738888694, "compression_loss": 43.23649978637695, "distillation_loss": 0.5670492649078369, "epoch": 1.16, "learning_rate": 4.172805216761261e-05, "loss": 43.8371, "step": 1370, "task_loss": 0.8751498460769653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4029229646317146, "compression/movement_sparsity/importance_threshold": -0.0027363274379242223, "compression/movement_sparsity/linear_layer_sparsity": 0.18252699011496043, "compression/movement_sparsity/model_sparsity": 0.17625662648581122, "compression_loss": 43.42981719970703, "distillation_loss": 0.7420504093170166, "epoch": 1.16, "learning_rate": 4.172201424948678e-05, "loss": 44.0812, "step": 1371, "task_loss": 1.035954475402832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40471778417615933, "compression/movement_sparsity/importance_threshold": -0.0027281020102579963, "compression/movement_sparsity/linear_layer_sparsity": 0.18449025661953472, "compression/movement_sparsity/model_sparsity": 0.1781524487462392, "compression_loss": 43.62273025512695, "distillation_loss": 0.8169597387313843, "epoch": 1.16, "learning_rate": 4.171597633136095e-05, "loss": 44.3088, "step": 1372, "task_loss": 0.7141774892807007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4065090032814027, "compression/movement_sparsity/importance_threshold": -0.0027198930829426316, "compression/movement_sparsity/linear_layer_sparsity": 0.18619239577705127, "compression/movement_sparsity/model_sparsity": 0.1797961141873025, "compression_loss": 43.815223693847656, "distillation_loss": 1.4957916736602783, "epoch": 1.16, "learning_rate": 4.1709938413235116e-05, "loss": 44.6993, "step": 1373, "task_loss": 1.3578258752822876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40829662556234336, "compression/movement_sparsity/importance_threshold": -0.0027117006394115125, "compression/movement_sparsity/linear_layer_sparsity": 0.18805498653427596, "compression/movement_sparsity/model_sparsity": 0.18159471922201717, "compression_loss": 44.00736618041992, "distillation_loss": 1.077641248703003, "epoch": 1.16, "learning_rate": 4.170390049510929e-05, "loss": 44.751, "step": 1374, "task_loss": 0.8648414015769958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4100806546338799, "compression/movement_sparsity/importance_threshold": -0.002703524663098023, "compression/movement_sparsity/linear_layer_sparsity": 0.18977732523176766, "compression/movement_sparsity/model_sparsity": 0.18325789028671643, "compression_loss": 44.19908905029297, "distillation_loss": 0.5616555213928223, "epoch": 1.16, "learning_rate": 4.169786257698346e-05, "loss": 44.7433, "step": 1375, "task_loss": 0.6486899256706238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41186109411091154, "compression/movement_sparsity/importance_threshold": -0.0026953651374355425, "compression/movement_sparsity/linear_layer_sparsity": 0.19174476519501452, "compression/movement_sparsity/model_sparsity": 0.1851577426346725, "compression_loss": 44.3903694152832, "distillation_loss": 0.4598456621170044, "epoch": 1.16, "learning_rate": 4.1691824658857624e-05, "loss": 44.9598, "step": 1376, "task_loss": 1.262534737586975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4136379476083367, "compression/movement_sparsity/importance_threshold": -0.0026872220458574562, "compression/movement_sparsity/linear_layer_sparsity": 0.19372782581786435, "compression/movement_sparsity/model_sparsity": 0.18707267902451938, "compression_loss": 44.581260681152344, "distillation_loss": 0.7080291509628296, "epoch": 1.16, "learning_rate": 4.16857867407318e-05, "loss": 45.3542, "step": 1377, "task_loss": 0.907243013381958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41541121874105424, "compression/movement_sparsity/importance_threshold": -0.002679095371797147, "compression/movement_sparsity/linear_layer_sparsity": 0.19564710406802996, "compression/movement_sparsity/model_sparsity": 0.18892602416240128, "compression_loss": 44.77177429199219, "distillation_loss": 1.119140863418579, "epoch": 1.16, "learning_rate": 4.1679748822605965e-05, "loss": 45.7409, "step": 1378, "task_loss": 0.5252880454063416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41718091112396327, "compression/movement_sparsity/importance_threshold": -0.002670985098687995, "compression/movement_sparsity/linear_layer_sparsity": 0.19723188556767432, "compression/movement_sparsity/model_sparsity": 0.19045636354217474, "compression_loss": 44.96189880371094, "distillation_loss": 0.7602623105049133, "epoch": 1.17, "learning_rate": 4.167371090448014e-05, "loss": 45.6283, "step": 1379, "task_loss": 0.18579337000846863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4189470283719622, "compression/movement_sparsity/importance_threshold": -0.0026628912099633858, "compression/movement_sparsity/linear_layer_sparsity": 0.19899702010281112, "compression/movement_sparsity/model_sparsity": 0.19216086027584062, "compression_loss": 45.15161895751953, "distillation_loss": 0.6066277623176575, "epoch": 1.17, "learning_rate": 4.1667672986354306e-05, "loss": 45.8584, "step": 1380, "task_loss": 0.34916430711746216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4207095740999499, "compression/movement_sparsity/importance_threshold": -0.0026548136890567004, "compression/movement_sparsity/linear_layer_sparsity": 0.20080458082639632, "compression/movement_sparsity/model_sparsity": 0.19390632572786345, "compression_loss": 45.340972900390625, "distillation_loss": 1.1802936792373657, "epoch": 1.17, "learning_rate": 4.166163506822848e-05, "loss": 46.2154, "step": 1381, "task_loss": 1.6261680126190186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4224685519228254, "compression/movement_sparsity/importance_threshold": -0.0026467525194013215, "compression/movement_sparsity/linear_layer_sparsity": 0.20281157325369134, "compression/movement_sparsity/model_sparsity": 0.19584437179105, "compression_loss": 45.52993392944336, "distillation_loss": 0.8602122068405151, "epoch": 1.17, "learning_rate": 4.165559715010265e-05, "loss": 46.3358, "step": 1382, "task_loss": 0.365669846534729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42422396545548735, "compression/movement_sparsity/importance_threshold": -0.002638707684430632, "compression/movement_sparsity/linear_layer_sparsity": 0.20468489576178833, "compression/movement_sparsity/model_sparsity": 0.19765333990797976, "compression_loss": 45.718509674072266, "distillation_loss": 0.721659779548645, "epoch": 1.17, "learning_rate": 4.1649559231976815e-05, "loss": 46.4388, "step": 1383, "task_loss": 1.4450441598892212 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42597581831283426, "compression/movement_sparsity/importance_threshold": -0.002630679167578016, "compression/movement_sparsity/linear_layer_sparsity": 0.20654894126746462, "compression/movement_sparsity/model_sparsity": 0.19945334971606132, "compression_loss": 45.90668487548828, "distillation_loss": 0.8733277916908264, "epoch": 1.17, "learning_rate": 4.164352131385099e-05, "loss": 46.4835, "step": 1384, "task_loss": 1.054774522781372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4277241141097655, "compression/movement_sparsity/importance_threshold": -0.0026226669522768534, "compression/movement_sparsity/linear_layer_sparsity": 0.20862998277577832, "compression/movement_sparsity/model_sparsity": 0.20146290104653197, "compression_loss": 46.09446716308594, "distillation_loss": 0.8542643785476685, "epoch": 1.17, "learning_rate": 4.1637483395725156e-05, "loss": 46.9222, "step": 1385, "task_loss": 1.3610093593597412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4294688564611795, "compression/movement_sparsity/importance_threshold": -0.0026146710219605287, "compression/movement_sparsity/linear_layer_sparsity": 0.21063102504342304, "compression/movement_sparsity/model_sparsity": 0.20339520135635705, "compression_loss": 46.28190994262695, "distillation_loss": 0.5731046795845032, "epoch": 1.17, "learning_rate": 4.163144547759932e-05, "loss": 47.1125, "step": 1386, "task_loss": 0.5190793871879578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4312100489819751, "compression/movement_sparsity/importance_threshold": -0.0026066913600624243, "compression/movement_sparsity/linear_layer_sparsity": 0.21251053619452304, "compression/movement_sparsity/model_sparsity": 0.20521014551736416, "compression_loss": 46.46894073486328, "distillation_loss": 0.23534142971038818, "epoch": 1.17, "learning_rate": 4.16254075594735e-05, "loss": 47.2349, "step": 1387, "task_loss": 0.14914126694202423 }, { "compression/movement_sparsity/importance_regularization_factor": 0.432947695287051, "compression/movement_sparsity/importance_threshold": -0.0025987279500159235, "compression/movement_sparsity/linear_layer_sparsity": 0.21453810973515755, "compression/movement_sparsity/model_sparsity": 0.20716806566933207, "compression_loss": 46.655582427978516, "distillation_loss": 0.5035988092422485, "epoch": 1.17, "learning_rate": 4.1619369641347664e-05, "loss": 47.4697, "step": 1388, "task_loss": 1.0292022228240967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4346817989913063, "compression/movement_sparsity/importance_threshold": -0.002590780775254407, "compression/movement_sparsity/linear_layer_sparsity": 0.2164278756704244, "compression/movement_sparsity/model_sparsity": 0.2089929123311225, "compression_loss": 46.84183120727539, "distillation_loss": 0.40400779247283936, "epoch": 1.17, "learning_rate": 4.161333172322184e-05, "loss": 47.4812, "step": 1389, "task_loss": 0.6485611796379089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43641236370963965, "compression/movement_sparsity/importance_threshold": -0.0025828498192112586, "compression/movement_sparsity/linear_layer_sparsity": 0.21847029479976557, "compression/movement_sparsity/model_sparsity": 0.2109651680801546, "compression_loss": 47.0276985168457, "distillation_loss": 0.5797058343887329, "epoch": 1.17, "learning_rate": 4.1607293805096005e-05, "loss": 47.9426, "step": 1390, "task_loss": 0.4651699364185333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4381393930569498, "compression/movement_sparsity/importance_threshold": -0.0025749350653198615, "compression/movement_sparsity/linear_layer_sparsity": 0.2205433709640985, "compression/movement_sparsity/model_sparsity": 0.21296702770071452, "compression_loss": 47.21320343017578, "distillation_loss": 0.43159276247024536, "epoch": 1.18, "learning_rate": 4.160125588697017e-05, "loss": 47.8427, "step": 1391, "task_loss": 0.8173123598098755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43986289064813533, "compression/movement_sparsity/importance_threshold": -0.0025670364970135987, "compression/movement_sparsity/linear_layer_sparsity": 0.22261848616109722, "compression/movement_sparsity/model_sparsity": 0.21497085630689527, "compression_loss": 47.3983154296875, "distillation_loss": 0.9494724869728088, "epoch": 1.18, "learning_rate": 4.1595217968844346e-05, "loss": 48.2696, "step": 1392, "task_loss": 0.8915983438491821 }, { "compression/movement_sparsity/importance_regularization_factor": 0.44158286009809544, "compression/movement_sparsity/importance_threshold": -0.0025591540977258514, "compression/movement_sparsity/linear_layer_sparsity": 0.22453118227072782, "compression/movement_sparsity/model_sparsity": 0.2168178454210186, "compression_loss": 47.583011627197266, "distillation_loss": 0.5570276975631714, "epoch": 1.18, "learning_rate": 4.1589180050718514e-05, "loss": 48.4539, "step": 1393, "task_loss": 0.3232381045818329 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4432993050217289, "compression/movement_sparsity/importance_threshold": -0.0025512878508900023, "compression/movement_sparsity/linear_layer_sparsity": 0.22644002687421205, "compression/movement_sparsity/model_sparsity": 0.21866111534008026, "compression_loss": 47.76732635498047, "distillation_loss": 0.6672165393829346, "epoch": 1.18, "learning_rate": 4.158314213259268e-05, "loss": 48.6178, "step": 1394, "task_loss": 0.8677528500556946 }, { "compression/movement_sparsity/importance_regularization_factor": 0.445012229033934, "compression/movement_sparsity/importance_threshold": -0.002543437739939436, "compression/movement_sparsity/linear_layer_sparsity": 0.22851664451633286, "compression/movement_sparsity/model_sparsity": 0.22066639477777114, "compression_loss": 47.951297760009766, "distillation_loss": 0.8087161183357239, "epoch": 1.18, "learning_rate": 4.1577104214466855e-05, "loss": 48.7279, "step": 1395, "task_loss": 0.6771552562713623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4467216357496101, "compression/movement_sparsity/importance_threshold": -0.0025356037483075335, "compression/movement_sparsity/linear_layer_sparsity": 0.23043451571471743, "compression/movement_sparsity/model_sparsity": 0.22251838120042927, "compression_loss": 48.13488006591797, "distillation_loss": 1.235686182975769, "epoch": 1.18, "learning_rate": 4.157106629634102e-05, "loss": 49.3076, "step": 1396, "task_loss": 0.9543386101722717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4484275287836559, "compression/movement_sparsity/importance_threshold": -0.0025277858594276773, "compression/movement_sparsity/linear_layer_sparsity": 0.23240395893812713, "compression/movement_sparsity/model_sparsity": 0.22442016799039882, "compression_loss": 48.31809616088867, "distillation_loss": 0.7839653491973877, "epoch": 1.18, "learning_rate": 4.1565028378215196e-05, "loss": 49.144, "step": 1397, "task_loss": 1.2677866220474243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4501299117509697, "compression/movement_sparsity/importance_threshold": -0.002519984056733252, "compression/movement_sparsity/linear_layer_sparsity": 0.23425888245556198, "compression/movement_sparsity/model_sparsity": 0.22621136917859758, "compression_loss": 48.5009651184082, "distillation_loss": 0.6091524958610535, "epoch": 1.18, "learning_rate": 4.155899046008936e-05, "loss": 49.3116, "step": 1398, "task_loss": 0.46659186482429504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4518287882664511, "compression/movement_sparsity/importance_threshold": -0.0025121983236576374, "compression/movement_sparsity/linear_layer_sparsity": 0.23609080425362725, "compression/movement_sparsity/model_sparsity": 0.22798035882724868, "compression_loss": 48.68345260620117, "distillation_loss": 1.7487878799438477, "epoch": 1.18, "learning_rate": 4.155295254196354e-05, "loss": 50.0042, "step": 1399, "task_loss": 1.2695006132125854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45352416194499834, "compression/movement_sparsity/importance_threshold": -0.002504428643634218, "compression/movement_sparsity/linear_layer_sparsity": 0.23824822005564866, "compression/movement_sparsity/model_sparsity": 0.23006366075948334, "compression_loss": 48.86553955078125, "distillation_loss": 1.0289828777313232, "epoch": 1.18, "learning_rate": 4.1546914623837704e-05, "loss": 49.8366, "step": 1400, "task_loss": 1.0021551847457886 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4552160364015102, "compression/movement_sparsity/importance_threshold": -0.002496675000096377, "compression/movement_sparsity/linear_layer_sparsity": 0.24008929961445827, "compression/movement_sparsity/model_sparsity": 0.23184149357162465, "compression_loss": 49.0472412109375, "distillation_loss": 0.9225964546203613, "epoch": 1.18, "learning_rate": 4.154087670571187e-05, "loss": 49.8585, "step": 1401, "task_loss": 0.6742496490478516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4569044152508859, "compression/movement_sparsity/importance_threshold": -0.002488937376477495, "compression/movement_sparsity/linear_layer_sparsity": 0.24200739737202792, "compression/movement_sparsity/model_sparsity": 0.2336936987704629, "compression_loss": 49.22858810424805, "distillation_loss": 1.4452874660491943, "epoch": 1.19, "learning_rate": 4.1534838787586045e-05, "loss": 50.21, "step": 1402, "task_loss": 1.4102370738983154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45858930210802396, "compression/movement_sparsity/importance_threshold": -0.0024812157562109564, "compression/movement_sparsity/linear_layer_sparsity": 0.2440696702404828, "compression/movement_sparsity/model_sparsity": 0.23568512622159293, "compression_loss": 49.40953826904297, "distillation_loss": 0.8179798722267151, "epoch": 1.19, "learning_rate": 4.152880086946021e-05, "loss": 50.1999, "step": 1403, "task_loss": 1.8175069093704224 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46027070058782293, "compression/movement_sparsity/importance_threshold": -0.0024735101227301437, "compression/movement_sparsity/linear_layer_sparsity": 0.24608199277071105, "compression/movement_sparsity/model_sparsity": 0.23762831928227965, "compression_loss": 49.5900764465332, "distillation_loss": 0.8096897602081299, "epoch": 1.19, "learning_rate": 4.152276295133438e-05, "loss": 50.3873, "step": 1404, "task_loss": 0.722550630569458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4619486143051821, "compression/movement_sparsity/importance_threshold": -0.002465820459468439, "compression/movement_sparsity/linear_layer_sparsity": 0.24816018440295978, "compression/movement_sparsity/model_sparsity": 0.23963511863869538, "compression_loss": 49.77024459838867, "distillation_loss": 0.8878952860832214, "epoch": 1.19, "learning_rate": 4.1516725033208554e-05, "loss": 50.7337, "step": 1405, "task_loss": 0.629235565662384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.463623046875, "compression/movement_sparsity/importance_threshold": -0.0024581467498592247, "compression/movement_sparsity/linear_layer_sparsity": 0.25001024285999923, "compression/movement_sparsity/model_sparsity": 0.24142162189629, "compression_loss": 49.95001220703125, "distillation_loss": 0.6177130937576294, "epoch": 1.19, "learning_rate": 4.151068711508272e-05, "loss": 50.8943, "step": 1406, "task_loss": 1.1544272899627686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4652940019121755, "compression/movement_sparsity/importance_threshold": -0.002450488977335884, "compression/movement_sparsity/linear_layer_sparsity": 0.25180075202386515, "compression/movement_sparsity/model_sparsity": 0.24315062156212663, "compression_loss": 50.12944412231445, "distillation_loss": 0.8338093161582947, "epoch": 1.19, "learning_rate": 4.150464919695689e-05, "loss": 50.9173, "step": 1407, "task_loss": 1.0770759582519531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46696148303160734, "compression/movement_sparsity/importance_threshold": -0.0024428471253317994, "compression/movement_sparsity/linear_layer_sparsity": 0.25356757979080624, "compression/movement_sparsity/model_sparsity": 0.24485675335987533, "compression_loss": 50.308441162109375, "distillation_loss": 0.5739105939865112, "epoch": 1.19, "learning_rate": 4.149861127883106e-05, "loss": 51.3602, "step": 1408, "task_loss": 1.3779425621032715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4686254938481941, "compression/movement_sparsity/importance_threshold": -0.002435221177280355, "compression/movement_sparsity/linear_layer_sparsity": 0.2556490982658254, "compression/movement_sparsity/model_sparsity": 0.24686676527177775, "compression_loss": 50.487125396728516, "distillation_loss": 0.8901494741439819, "epoch": 1.19, "learning_rate": 4.1492573360705236e-05, "loss": 51.3715, "step": 1409, "task_loss": 1.1198927164077759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4702860379768351, "compression/movement_sparsity/importance_threshold": -0.0024276111166149305, "compression/movement_sparsity/linear_layer_sparsity": 0.2575920102162453, "compression/movement_sparsity/model_sparsity": 0.24874293221960445, "compression_loss": 50.6654167175293, "distillation_loss": 0.648289680480957, "epoch": 1.19, "learning_rate": 4.1486535442579396e-05, "loss": 51.6902, "step": 1410, "task_loss": 1.282023310661316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4719431190324288, "compression/movement_sparsity/importance_threshold": -0.002420016926768911, "compression/movement_sparsity/linear_layer_sparsity": 0.2596068248975094, "compression/movement_sparsity/model_sparsity": 0.2506885318182722, "compression_loss": 50.843360900878906, "distillation_loss": 0.8428126573562622, "epoch": 1.19, "learning_rate": 4.148049752445357e-05, "loss": 51.7478, "step": 1411, "task_loss": 1.4244431257247925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4735967406298738, "compression/movement_sparsity/importance_threshold": -0.002412438591175679, "compression/movement_sparsity/linear_layer_sparsity": 0.26166181209953876, "compression/movement_sparsity/model_sparsity": 0.25267292388803175, "compression_loss": 51.02088165283203, "distillation_loss": 0.7793769240379333, "epoch": 1.19, "learning_rate": 4.1474459606327744e-05, "loss": 52.1259, "step": 1412, "task_loss": 1.5170975923538208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4752469063840694, "compression/movement_sparsity/importance_threshold": -0.0024048760932686153, "compression/movement_sparsity/linear_layer_sparsity": 0.2636163143409007, "compression/movement_sparsity/model_sparsity": 0.2545602829646508, "compression_loss": 51.19805908203125, "distillation_loss": 0.8450014591217041, "epoch": 1.19, "learning_rate": 4.146842168820191e-05, "loss": 51.9837, "step": 1413, "task_loss": 0.9495001435279846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4768936199099141, "compression/movement_sparsity/importance_threshold": -0.0023973294164811045, "compression/movement_sparsity/linear_layer_sparsity": 0.2656318086997201, "compression/movement_sparsity/model_sparsity": 0.2565065388918588, "compression_loss": 51.37485122680664, "distillation_loss": 0.7152976989746094, "epoch": 1.2, "learning_rate": 4.146238377007608e-05, "loss": 52.0954, "step": 1414, "task_loss": 0.5051455497741699 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4785368848223065, "compression/movement_sparsity/importance_threshold": -0.002389798544246529, "compression/movement_sparsity/linear_layer_sparsity": 0.26774956472018463, "compression/movement_sparsity/model_sparsity": 0.2585515434780408, "compression_loss": 51.551265716552734, "distillation_loss": 0.9359161257743835, "epoch": 1.2, "learning_rate": 4.145634585195025e-05, "loss": 52.5079, "step": 1415, "task_loss": 1.0214064121246338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48017670473614593, "compression/movement_sparsity/importance_threshold": -0.00238228345999827, "compression/movement_sparsity/linear_layer_sparsity": 0.269755889394092, "compression/movement_sparsity/model_sparsity": 0.2604889447272229, "compression_loss": 51.727317810058594, "distillation_loss": 0.8074865341186523, "epoch": 1.2, "learning_rate": 4.145030793382442e-05, "loss": 52.7506, "step": 1416, "task_loss": 0.68890780210495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4818130832663309, "compression/movement_sparsity/importance_threshold": -0.0023747841471697113, "compression/movement_sparsity/linear_layer_sparsity": 0.2718171129357949, "compression/movement_sparsity/model_sparsity": 0.262479358899203, "compression_loss": 51.90298080444336, "distillation_loss": 1.3543808460235596, "epoch": 1.2, "learning_rate": 4.144427001569859e-05, "loss": 52.8076, "step": 1417, "task_loss": 1.6557782888412476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4834460240277598, "compression/movement_sparsity/importance_threshold": -0.002367300589194237, "compression/movement_sparsity/linear_layer_sparsity": 0.2739106151992881, "compression/movement_sparsity/model_sparsity": 0.26450094291957893, "compression_loss": 52.0782585144043, "distillation_loss": 0.9128684997558594, "epoch": 1.2, "learning_rate": 4.143823209757276e-05, "loss": 52.969, "step": 1418, "task_loss": 1.0251028537750244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48507553063533226, "compression/movement_sparsity/importance_threshold": -0.0023598327695052265, "compression/movement_sparsity/linear_layer_sparsity": 0.27582851024600796, "compression/movement_sparsity/model_sparsity": 0.2663529523713087, "compression_loss": 52.253177642822266, "distillation_loss": 0.6355591416358948, "epoch": 1.2, "learning_rate": 4.143219417944693e-05, "loss": 53.1173, "step": 1419, "task_loss": 0.4957306981086731 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48670160670394647, "compression/movement_sparsity/importance_threshold": -0.0023523806715360653, "compression/movement_sparsity/linear_layer_sparsity": 0.2779702338434206, "compression/movement_sparsity/model_sparsity": 0.26842110117443774, "compression_loss": 52.42771911621094, "distillation_loss": 1.8318843841552734, "epoch": 1.2, "learning_rate": 4.1426156261321095e-05, "loss": 53.615, "step": 1420, "task_loss": 1.216367244720459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48832425584850125, "compression/movement_sparsity/importance_threshold": -0.002344944278720135, "compression/movement_sparsity/linear_layer_sparsity": 0.27986445941738325, "compression/movement_sparsity/model_sparsity": 0.27025025427261534, "compression_loss": 52.60190963745117, "distillation_loss": 0.7731907367706299, "epoch": 1.2, "learning_rate": 4.142011834319527e-05, "loss": 53.4765, "step": 1421, "task_loss": 0.593701183795929 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4899434816838958, "compression/movement_sparsity/importance_threshold": -0.0023375235744908175, "compression/movement_sparsity/linear_layer_sparsity": 0.2817269667054345, "compression/movement_sparsity/model_sparsity": 0.2720487787055794, "compression_loss": 52.775699615478516, "distillation_loss": 1.5682404041290283, "epoch": 1.2, "learning_rate": 4.141408042506944e-05, "loss": 53.8405, "step": 1422, "task_loss": 1.4268540143966675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49155928782502856, "compression/movement_sparsity/importance_threshold": -0.0023301185422814966, "compression/movement_sparsity/linear_layer_sparsity": 0.2838436376266442, "compression/movement_sparsity/model_sparsity": 0.27409273546900415, "compression_loss": 52.94915008544922, "distillation_loss": 0.5272500514984131, "epoch": 1.2, "learning_rate": 4.1408042506943604e-05, "loss": 53.6855, "step": 1423, "task_loss": 0.545565664768219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4931716778867985, "compression/movement_sparsity/importance_threshold": -0.002322729165525554, "compression/movement_sparsity/linear_layer_sparsity": 0.28591085902466795, "compression/movement_sparsity/model_sparsity": 0.2760889414524889, "compression_loss": 53.122215270996094, "distillation_loss": 1.4333659410476685, "epoch": 1.2, "learning_rate": 4.140200458881778e-05, "loss": 54.1605, "step": 1424, "task_loss": 1.0088248252868652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4947806554841041, "compression/movement_sparsity/importance_threshold": -0.002315355427656374, "compression/movement_sparsity/linear_layer_sparsity": 0.28784207336663703, "compression/movement_sparsity/model_sparsity": 0.2779538126407011, "compression_loss": 53.294891357421875, "distillation_loss": 1.0239853858947754, "epoch": 1.2, "learning_rate": 4.139596667069195e-05, "loss": 54.1335, "step": 1425, "task_loss": 0.7140935063362122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49638622423184464, "compression/movement_sparsity/importance_threshold": -0.002307997312107338, "compression/movement_sparsity/linear_layer_sparsity": 0.2900421896129625, "compression/movement_sparsity/model_sparsity": 0.2800783481256161, "compression_loss": 53.46720886230469, "distillation_loss": 0.5709481239318848, "epoch": 1.21, "learning_rate": 4.138992875256611e-05, "loss": 54.3082, "step": 1426, "task_loss": 0.7841566801071167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4979883877449186, "compression/movement_sparsity/importance_threshold": -0.002300654802311828, "compression/movement_sparsity/linear_layer_sparsity": 0.292057648199279, "compression/movement_sparsity/model_sparsity": 0.2820245695092167, "compression_loss": 53.63915252685547, "distillation_loss": 1.245058536529541, "epoch": 1.21, "learning_rate": 4.1383890834440286e-05, "loss": 54.589, "step": 1427, "task_loss": 0.9732547402381897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4995871496382249, "compression/movement_sparsity/importance_threshold": -0.002293327881703228, "compression/movement_sparsity/linear_layer_sparsity": 0.29416776082828894, "compression/movement_sparsity/model_sparsity": 0.28406219327795446, "compression_loss": 53.810726165771484, "distillation_loss": 1.279276967048645, "epoch": 1.21, "learning_rate": 4.137785291631446e-05, "loss": 54.7984, "step": 1428, "task_loss": 0.8782503008842468 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5011825135266621, "compression/movement_sparsity/importance_threshold": -0.0022860165337149213, "compression/movement_sparsity/linear_layer_sparsity": 0.29640304144497254, "compression/movement_sparsity/model_sparsity": 0.2862206851289275, "compression_loss": 53.9819221496582, "distillation_loss": 1.1025187969207764, "epoch": 1.21, "learning_rate": 4.137181499818863e-05, "loss": 54.9574, "step": 1429, "task_loss": 0.660609245300293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5027744830251293, "compression/movement_sparsity/importance_threshold": -0.002278720741780289, "compression/movement_sparsity/linear_layer_sparsity": 0.2984352415626498, "compression/movement_sparsity/model_sparsity": 0.2881830729207837, "compression_loss": 54.15276336669922, "distillation_loss": 1.4247691631317139, "epoch": 1.21, "learning_rate": 4.1365777080062794e-05, "loss": 55.2051, "step": 1430, "task_loss": 0.7182941436767578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5043630617485253, "compression/movement_sparsity/importance_threshold": -0.0022714404893327132, "compression/movement_sparsity/linear_layer_sparsity": 0.3004465386144613, "compression/movement_sparsity/model_sparsity": 0.2901252757313921, "compression_loss": 54.323265075683594, "distillation_loss": 0.9132450819015503, "epoch": 1.21, "learning_rate": 4.135973916193697e-05, "loss": 55.1953, "step": 1431, "task_loss": 0.48651307821273804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5059482533117485, "compression/movement_sparsity/importance_threshold": -0.002264175759805579, "compression/movement_sparsity/linear_layer_sparsity": 0.302332643830264, "compression/movement_sparsity/model_sparsity": 0.29194658743069357, "compression_loss": 54.49338150024414, "distillation_loss": 1.1625076532363892, "epoch": 1.21, "learning_rate": 4.1353701243811135e-05, "loss": 55.6823, "step": 1432, "task_loss": 1.5855516195297241 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5075300613296982, "compression/movement_sparsity/importance_threshold": -0.0022569265366322666, "compression/movement_sparsity/linear_layer_sparsity": 0.30426878285346665, "compression/movement_sparsity/model_sparsity": 0.29381621412218895, "compression_loss": 54.663143157958984, "distillation_loss": 1.6091574430465698, "epoch": 1.21, "learning_rate": 4.13476633256853e-05, "loss": 55.6279, "step": 1433, "task_loss": 1.5340192317962646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.509108489417273, "compression/movement_sparsity/importance_threshold": -0.00224969280324616, "compression/movement_sparsity/linear_layer_sparsity": 0.3059428167478655, "compression/movement_sparsity/model_sparsity": 0.29543273980238455, "compression_loss": 54.832557678222656, "distillation_loss": 0.7424619793891907, "epoch": 1.21, "learning_rate": 4.1341625407559477e-05, "loss": 55.908, "step": 1434, "task_loss": 0.49996769428253174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5106835411893714, "compression/movement_sparsity/importance_threshold": -0.002242474543080643, "compression/movement_sparsity/linear_layer_sparsity": 0.307753346589192, "compression/movement_sparsity/model_sparsity": 0.29718107237382024, "compression_loss": 55.001625061035156, "distillation_loss": 1.0491019487380981, "epoch": 1.21, "learning_rate": 4.1335587489433644e-05, "loss": 55.9898, "step": 1435, "task_loss": 0.5780500769615173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5122552202608928, "compression/movement_sparsity/importance_threshold": -0.0022352717395690952, "compression/movement_sparsity/linear_layer_sparsity": 0.3098265896918719, "compression/movement_sparsity/model_sparsity": 0.2991830931978812, "compression_loss": 55.1702880859375, "distillation_loss": 1.4050453901290894, "epoch": 1.21, "learning_rate": 4.132954957130781e-05, "loss": 56.17, "step": 1436, "task_loss": 0.7491946816444397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5138235302467355, "compression/movement_sparsity/importance_threshold": -0.0022280843761449012, "compression/movement_sparsity/linear_layer_sparsity": 0.3116955598787818, "compression/movement_sparsity/model_sparsity": 0.300987858509246, "compression_loss": 55.33863067626953, "distillation_loss": 1.4136513471603394, "epoch": 1.21, "learning_rate": 4.1323511653181985e-05, "loss": 56.3738, "step": 1437, "task_loss": 0.9430605173110962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5153884747617983, "compression/movement_sparsity/importance_threshold": -0.002220912436241445, "compression/movement_sparsity/linear_layer_sparsity": 0.3136789186058225, "compression/movement_sparsity/model_sparsity": 0.30290308276248773, "compression_loss": 55.5065803527832, "distillation_loss": 1.463271975517273, "epoch": 1.22, "learning_rate": 4.131747373505616e-05, "loss": 56.8489, "step": 1438, "task_loss": 1.9897266626358032 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5169500574209804, "compression/movement_sparsity/importance_threshold": -0.002213755903292106, "compression/movement_sparsity/linear_layer_sparsity": 0.31561320938304166, "compression/movement_sparsity/model_sparsity": 0.304770924700935, "compression_loss": 55.67420196533203, "distillation_loss": 1.3778387308120728, "epoch": 1.22, "learning_rate": 4.131143581693032e-05, "loss": 56.971, "step": 1439, "task_loss": 1.1962980031967163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5185082818391804, "compression/movement_sparsity/importance_threshold": -0.0022066147607302687, "compression/movement_sparsity/linear_layer_sparsity": 0.3175832726631684, "compression/movement_sparsity/model_sparsity": 0.3066733102467658, "compression_loss": 55.841453552246094, "distillation_loss": 0.6576246619224548, "epoch": 1.22, "learning_rate": 4.130539789880449e-05, "loss": 56.622, "step": 1440, "task_loss": 0.14681243896484375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5200631516312967, "compression/movement_sparsity/importance_threshold": -0.0021994889919893174, "compression/movement_sparsity/linear_layer_sparsity": 0.31941934407157097, "compression/movement_sparsity/model_sparsity": 0.30844630695387343, "compression_loss": 56.00831604003906, "distillation_loss": 1.684802770614624, "epoch": 1.22, "learning_rate": 4.129935998067867e-05, "loss": 57.3767, "step": 1441, "task_loss": 0.743128776550293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5216146704122288, "compression/movement_sparsity/importance_threshold": -0.0021923785805026312, "compression/movement_sparsity/linear_layer_sparsity": 0.32138649785479456, "compression/movement_sparsity/model_sparsity": 0.3103458829529704, "compression_loss": 56.17481994628906, "distillation_loss": 0.98088538646698, "epoch": 1.22, "learning_rate": 4.1293322062552834e-05, "loss": 57.1654, "step": 1442, "task_loss": 1.3558443784713745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.523162841796875, "compression/movement_sparsity/importance_threshold": -0.002185283509703595, "compression/movement_sparsity/linear_layer_sparsity": 0.3235105736841061, "compression/movement_sparsity/model_sparsity": 0.31239699024312356, "compression_loss": 56.3409423828125, "distillation_loss": 0.4254019260406494, "epoch": 1.22, "learning_rate": 4.1287284144427e-05, "loss": 57.2492, "step": 1443, "task_loss": 0.7811148166656494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5247076694001342, "compression/movement_sparsity/importance_threshold": -0.002178203763025591, "compression/movement_sparsity/linear_layer_sparsity": 0.32531586881584046, "compression/movement_sparsity/model_sparsity": 0.31414026793334543, "compression_loss": 56.50667953491211, "distillation_loss": 1.018589973449707, "epoch": 1.22, "learning_rate": 4.1281246226301175e-05, "loss": 57.5268, "step": 1444, "task_loss": 0.6681316494941711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5262491568369054, "compression/movement_sparsity/importance_threshold": -0.002171139323902002, "compression/movement_sparsity/linear_layer_sparsity": 0.32758713657044913, "compression/movement_sparsity/model_sparsity": 0.3163335106533464, "compression_loss": 56.67206573486328, "distillation_loss": 1.6108555793762207, "epoch": 1.22, "learning_rate": 4.127520830817534e-05, "loss": 57.965, "step": 1445, "task_loss": 1.1131993532180786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.527787307722087, "compression/movement_sparsity/importance_threshold": -0.002164090175766211, "compression/movement_sparsity/linear_layer_sparsity": 0.32956537982957407, "compression/movement_sparsity/model_sparsity": 0.3182437951707323, "compression_loss": 56.837120056152344, "distillation_loss": 1.2252107858657837, "epoch": 1.22, "learning_rate": 4.126917039004951e-05, "loss": 57.9877, "step": 1446, "task_loss": 1.0145320892333984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5293221256705782, "compression/movement_sparsity/importance_threshold": -0.0021570563020515995, "compression/movement_sparsity/linear_layer_sparsity": 0.33163272046927417, "compression/movement_sparsity/model_sparsity": 0.320240116299575, "compression_loss": 57.00176239013672, "distillation_loss": 1.4625208377838135, "epoch": 1.22, "learning_rate": 4.1263132471923684e-05, "loss": 58.5722, "step": 1447, "task_loss": 1.2029496431350708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5308536142972777, "compression/movement_sparsity/importance_threshold": -0.002150037686191551, "compression/movement_sparsity/linear_layer_sparsity": 0.3338054588267077, "compression/movement_sparsity/model_sparsity": 0.3223382144103057, "compression_loss": 57.16609573364258, "distillation_loss": 1.5080997943878174, "epoch": 1.22, "learning_rate": 4.125709455379785e-05, "loss": 58.1502, "step": 1448, "task_loss": 1.0628570318222046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5323817772170839, "compression/movement_sparsity/importance_threshold": -0.0021430343116194493, "compression/movement_sparsity/linear_layer_sparsity": 0.3358129162965405, "compression/movement_sparsity/model_sparsity": 0.32427670954038823, "compression_loss": 57.33007049560547, "distillation_loss": 1.2551956176757812, "epoch": 1.22, "learning_rate": 4.125105663567202e-05, "loss": 58.6316, "step": 1449, "task_loss": 0.4885764420032501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5339066180448961, "compression/movement_sparsity/importance_threshold": -0.002136046161768675, "compression/movement_sparsity/linear_layer_sparsity": 0.3378256204001331, "compression/movement_sparsity/model_sparsity": 0.3262202710662203, "compression_loss": 57.49367904663086, "distillation_loss": 1.583085298538208, "epoch": 1.23, "learning_rate": 4.124501871754619e-05, "loss": 58.8236, "step": 1450, "task_loss": 1.164199709892273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.535428140395613, "compression/movement_sparsity/importance_threshold": -0.002129073220072611, "compression/movement_sparsity/linear_layer_sparsity": 0.33972964763989244, "compression/movement_sparsity/model_sparsity": 0.328058889112821, "compression_loss": 57.65696716308594, "distillation_loss": 1.384728193283081, "epoch": 1.23, "learning_rate": 4.123898079942036e-05, "loss": 59.0798, "step": 1451, "task_loss": 1.6437386274337769 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5369463478841332, "compression/movement_sparsity/importance_threshold": -0.002122115469964642, "compression/movement_sparsity/linear_layer_sparsity": 0.34159006819860743, "compression/movement_sparsity/model_sparsity": 0.32985539850202106, "compression_loss": 57.81986999511719, "distillation_loss": 1.348381757736206, "epoch": 1.23, "learning_rate": 4.123294288129453e-05, "loss": 59.1949, "step": 1452, "task_loss": 2.407715320587158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5384612441253557, "compression/movement_sparsity/importance_threshold": -0.0021151728948781484, "compression/movement_sparsity/linear_layer_sparsity": 0.3436779422549765, "compression/movement_sparsity/model_sparsity": 0.331871547661502, "compression_loss": 57.98245620727539, "distillation_loss": 0.8128530979156494, "epoch": 1.23, "learning_rate": 4.12269049631687e-05, "loss": 59.0839, "step": 1453, "task_loss": 0.8020306825637817 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5399728327341792, "compression/movement_sparsity/importance_threshold": -0.0021082454782465135, "compression/movement_sparsity/linear_layer_sparsity": 0.34560945470113646, "compression/movement_sparsity/model_sparsity": 0.33373670671310907, "compression_loss": 58.144683837890625, "distillation_loss": 0.844206690788269, "epoch": 1.23, "learning_rate": 4.1220867045042874e-05, "loss": 59.1182, "step": 1454, "task_loss": 0.8747289776802063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5414811173255023, "compression/movement_sparsity/importance_threshold": -0.002101333203503121, "compression/movement_sparsity/linear_layer_sparsity": 0.347533550315027, "compression/movement_sparsity/model_sparsity": 0.33559470372345196, "compression_loss": 58.306549072265625, "distillation_loss": 3.0557281970977783, "epoch": 1.23, "learning_rate": 4.121482912691704e-05, "loss": 60.1992, "step": 1455, "task_loss": 1.803056240081787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5429861015142242, "compression/movement_sparsity/importance_threshold": -0.002094436054081352, "compression/movement_sparsity/linear_layer_sparsity": 0.3495387898896795, "compression/movement_sparsity/model_sparsity": 0.33753105714987675, "compression_loss": 58.46807098388672, "distillation_loss": 1.0178608894348145, "epoch": 1.23, "learning_rate": 4.120879120879121e-05, "loss": 60.145, "step": 1456, "task_loss": 0.7710824608802795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5444877889152435, "compression/movement_sparsity/importance_threshold": -0.0020875540134145903, "compression/movement_sparsity/linear_layer_sparsity": 0.3514422686177276, "compression/movement_sparsity/model_sparsity": 0.3393691455278309, "compression_loss": 58.62922668457031, "distillation_loss": 1.576033592224121, "epoch": 1.23, "learning_rate": 4.120275329066538e-05, "loss": 59.8874, "step": 1457, "task_loss": 0.7162710428237915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5459861831434588, "compression/movement_sparsity/importance_threshold": -0.0020806870649362186, "compression/movement_sparsity/linear_layer_sparsity": 0.35342391026462877, "compression/movement_sparsity/model_sparsity": 0.34128271168791824, "compression_loss": 58.7900390625, "distillation_loss": 1.9783668518066406, "epoch": 1.23, "learning_rate": 4.119671537253955e-05, "loss": 60.1444, "step": 1458, "task_loss": 1.8920761346817017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5474812878137694, "compression/movement_sparsity/importance_threshold": -0.0020738351920796183, "compression/movement_sparsity/linear_layer_sparsity": 0.3552746007025529, "compression/movement_sparsity/model_sparsity": 0.34306982521590995, "compression_loss": 58.95048141479492, "distillation_loss": 1.4378045797348022, "epoch": 1.23, "learning_rate": 4.119067745441372e-05, "loss": 60.4032, "step": 1459, "task_loss": 0.9839824438095093 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5489731065410736, "compression/movement_sparsity/importance_threshold": -0.0020669983782781736, "compression/movement_sparsity/linear_layer_sparsity": 0.3573330936098672, "compression/movement_sparsity/model_sparsity": 0.3450576025591931, "compression_loss": 59.11058807373047, "distillation_loss": 1.1574065685272217, "epoch": 1.23, "learning_rate": 4.118463953628789e-05, "loss": 60.8124, "step": 1460, "task_loss": 1.5861769914627075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5504616429402704, "compression/movement_sparsity/importance_threshold": -0.0020601766069652657, "compression/movement_sparsity/linear_layer_sparsity": 0.3590755364539929, "compression/movement_sparsity/model_sparsity": 0.34674018713124194, "compression_loss": 59.2703742980957, "distillation_loss": 0.94568932056427, "epoch": 1.23, "learning_rate": 4.117860161816206e-05, "loss": 60.4035, "step": 1461, "task_loss": 1.5892330408096313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5519469006262585, "compression/movement_sparsity/importance_threshold": -0.002053369861574279, "compression/movement_sparsity/linear_layer_sparsity": 0.3609353488801585, "compression/movement_sparsity/model_sparsity": 0.3485361092791165, "compression_loss": 59.42977523803711, "distillation_loss": 2.290698528289795, "epoch": 1.24, "learning_rate": 4.117256370003623e-05, "loss": 60.9545, "step": 1462, "task_loss": 1.6836585998535156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5534288832139369, "compression/movement_sparsity/importance_threshold": -0.002046578125538595, "compression/movement_sparsity/linear_layer_sparsity": 0.36306121333461544, "compression/movement_sparsity/model_sparsity": 0.3505889437496388, "compression_loss": 59.588836669921875, "distillation_loss": 1.393168330192566, "epoch": 1.24, "learning_rate": 4.11665257819104e-05, "loss": 61.109, "step": 1463, "task_loss": 0.8987039923667908 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5549075943182042, "compression/movement_sparsity/importance_threshold": -0.0020398013822915963, "compression/movement_sparsity/linear_layer_sparsity": 0.3650813342704775, "compression/movement_sparsity/model_sparsity": 0.35253966731673514, "compression_loss": 59.74753189086914, "distillation_loss": 1.2937185764312744, "epoch": 1.24, "learning_rate": 4.1160487863784567e-05, "loss": 61.083, "step": 1464, "task_loss": 1.0093616247177124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5563830375539595, "compression/movement_sparsity/importance_threshold": -0.0020330396152666657, "compression/movement_sparsity/linear_layer_sparsity": 0.3668587864707822, "compression/movement_sparsity/model_sparsity": 0.3542560585658768, "compression_loss": 59.9058952331543, "distillation_loss": 1.082183837890625, "epoch": 1.24, "learning_rate": 4.115444994565874e-05, "loss": 61.4415, "step": 1465, "task_loss": 1.4385806322097778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.557855216536101, "compression/movement_sparsity/importance_threshold": -0.0020262928078971865, "compression/movement_sparsity/linear_layer_sparsity": 0.3686067502045233, "compression/movement_sparsity/model_sparsity": 0.3559439743679985, "compression_loss": 60.06388473510742, "distillation_loss": 1.4611587524414062, "epoch": 1.24, "learning_rate": 4.114841202753291e-05, "loss": 61.3709, "step": 1466, "task_loss": 1.4987123012542725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5593241348795281, "compression/movement_sparsity/importance_threshold": -0.0020195609436165405, "compression/movement_sparsity/linear_layer_sparsity": 0.37050156006270013, "compression/movement_sparsity/model_sparsity": 0.35777369167843004, "compression_loss": 60.22148895263672, "distillation_loss": 1.0036077499389648, "epoch": 1.24, "learning_rate": 4.1142374109407075e-05, "loss": 61.5516, "step": 1467, "task_loss": 0.5925654172897339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5607897961991393, "compression/movement_sparsity/importance_threshold": -0.0020128440058581115, "compression/movement_sparsity/linear_layer_sparsity": 0.37236164674472133, "compression/movement_sparsity/model_sparsity": 0.35956987866062784, "compression_loss": 60.37880325317383, "distillation_loss": 1.1443729400634766, "epoch": 1.24, "learning_rate": 4.113633619128125e-05, "loss": 61.6419, "step": 1468, "task_loss": 1.0155270099639893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5622522041098333, "compression/movement_sparsity/importance_threshold": -0.002006141978055281, "compression/movement_sparsity/linear_layer_sparsity": 0.3742859450694616, "compression/movement_sparsity/model_sparsity": 0.3614280714180792, "compression_loss": 60.535736083984375, "distillation_loss": 3.01296329498291, "epoch": 1.24, "learning_rate": 4.1130298273155416e-05, "loss": 62.3745, "step": 1469, "task_loss": 2.1380703449249268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5637113622265093, "compression/movement_sparsity/importance_threshold": -0.0019994548436414322, "compression/movement_sparsity/linear_layer_sparsity": 0.37603539932415725, "compression/movement_sparsity/model_sparsity": 0.36311742653717527, "compression_loss": 60.69234085083008, "distillation_loss": 1.5222046375274658, "epoch": 1.24, "learning_rate": 4.112426035502959e-05, "loss": 61.9903, "step": 1470, "task_loss": 1.059743881225586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5651672741640659, "compression/movement_sparsity/importance_threshold": -0.001992782586049947, "compression/movement_sparsity/linear_layer_sparsity": 0.3779820912357177, "compression/movement_sparsity/model_sparsity": 0.3649972435928488, "compression_loss": 60.848594665527344, "distillation_loss": 2.0615234375, "epoch": 1.24, "learning_rate": 4.111822243690376e-05, "loss": 62.7954, "step": 1471, "task_loss": 1.4905356168746948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5666199435374015, "compression/movement_sparsity/importance_threshold": -0.00198612518871421, "compression/movement_sparsity/linear_layer_sparsity": 0.3797520431345794, "compression/movement_sparsity/model_sparsity": 0.3667063921989757, "compression_loss": 61.00452423095703, "distillation_loss": 1.2070536613464355, "epoch": 1.24, "learning_rate": 4.111218451877793e-05, "loss": 62.6008, "step": 1472, "task_loss": 0.6424928903579712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5680693739614154, "compression/movement_sparsity/importance_threshold": -0.0019794826350676025, "compression/movement_sparsity/linear_layer_sparsity": 0.3816491782054452, "compression/movement_sparsity/model_sparsity": 0.36853835484388714, "compression_loss": 61.160099029541016, "distillation_loss": 1.3064125776290894, "epoch": 1.24, "learning_rate": 4.11061466006521e-05, "loss": 62.5259, "step": 1473, "task_loss": 0.8117461204528809 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5695155690510064, "compression/movement_sparsity/importance_threshold": -0.001972854908543506, "compression/movement_sparsity/linear_layer_sparsity": 0.38331372046240586, "compression/movement_sparsity/model_sparsity": 0.3701457149535903, "compression_loss": 61.31534194946289, "distillation_loss": 1.2316553592681885, "epoch": 1.25, "learning_rate": 4.1100108682526265e-05, "loss": 62.5191, "step": 1474, "task_loss": 0.851533830165863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5709585324210729, "compression/movement_sparsity/importance_threshold": -0.0019662419925753063, "compression/movement_sparsity/linear_layer_sparsity": 0.38518096164500853, "compression/movement_sparsity/model_sparsity": 0.37194881065726476, "compression_loss": 61.47018814086914, "distillation_loss": 1.0875639915466309, "epoch": 1.25, "learning_rate": 4.109407076440044e-05, "loss": 62.594, "step": 1475, "task_loss": 0.90755295753479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.572398267686514, "compression/movement_sparsity/importance_threshold": -0.0019596438705963832, "compression/movement_sparsity/linear_layer_sparsity": 0.3871732992702791, "compression/movement_sparsity/model_sparsity": 0.37387270535595984, "compression_loss": 61.624691009521484, "distillation_loss": 2.107943534851074, "epoch": 1.25, "learning_rate": 4.108803284627461e-05, "loss": 63.6651, "step": 1476, "task_loss": 2.273907423019409 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5738347784622286, "compression/movement_sparsity/importance_threshold": -0.0019530605260401198, "compression/movement_sparsity/linear_layer_sparsity": 0.38897505292422563, "compression/movement_sparsity/model_sparsity": 0.37561256322905073, "compression_loss": 61.778831481933594, "distillation_loss": 1.5331597328186035, "epoch": 1.25, "learning_rate": 4.1081994928148774e-05, "loss": 63.4757, "step": 1477, "task_loss": 0.9895449280738831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5752680683631151, "compression/movement_sparsity/importance_threshold": -0.0019464919423399008, "compression/movement_sparsity/linear_layer_sparsity": 0.3908193639324646, "compression/movement_sparsity/model_sparsity": 0.3773935164803924, "compression_loss": 61.932647705078125, "distillation_loss": 1.8296146392822266, "epoch": 1.25, "learning_rate": 4.107595701002295e-05, "loss": 63.189, "step": 1478, "task_loss": 0.9827434420585632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5766981410040727, "compression/movement_sparsity/importance_threshold": -0.0019399381029291062, "compression/movement_sparsity/linear_layer_sparsity": 0.39272481014817257, "compression/movement_sparsity/model_sparsity": 0.3792335047567526, "compression_loss": 62.08606719970703, "distillation_loss": 2.008141279220581, "epoch": 1.25, "learning_rate": 4.1069919091897115e-05, "loss": 63.7388, "step": 1479, "task_loss": 1.5566858053207397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.578125, "compression/movement_sparsity/importance_threshold": -0.0019333989912411198, "compression/movement_sparsity/linear_layer_sparsity": 0.3945966182869798, "compression/movement_sparsity/model_sparsity": 0.38104101052763645, "compression_loss": 62.239158630371094, "distillation_loss": 1.2133147716522217, "epoch": 1.25, "learning_rate": 4.106388117377128e-05, "loss": 63.512, "step": 1480, "task_loss": 0.9936110973358154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5795486489657957, "compression/movement_sparsity/importance_threshold": -0.001926874590709325, "compression/movement_sparsity/linear_layer_sparsity": 0.3964702031267648, "compression/movement_sparsity/model_sparsity": 0.3828502319643537, "compression_loss": 62.391876220703125, "distillation_loss": 2.4016642570495605, "epoch": 1.25, "learning_rate": 4.1057843255645456e-05, "loss": 63.9915, "step": 1481, "task_loss": 2.427405595779419 }, { "compression/movement_sparsity/importance_regularization_factor": 0.580969091516359, "compression/movement_sparsity/importance_threshold": -0.0019203648847671028, "compression/movement_sparsity/linear_layer_sparsity": 0.3981740355160856, "compression/movement_sparsity/model_sparsity": 0.3844955324694998, "compression_loss": 62.54425811767578, "distillation_loss": 1.6436614990234375, "epoch": 1.25, "learning_rate": 4.105180533751963e-05, "loss": 63.9357, "step": 1482, "task_loss": 1.5809426307678223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.582386331266588, "compression/movement_sparsity/importance_threshold": -0.0019138698568478387, "compression/movement_sparsity/linear_layer_sparsity": 0.39992741282193345, "compression/movement_sparsity/model_sparsity": 0.38618867587087224, "compression_loss": 62.69633102416992, "distillation_loss": 1.3571672439575195, "epoch": 1.25, "learning_rate": 4.104576741939379e-05, "loss": 64.1701, "step": 1483, "task_loss": 2.2113211154937744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5838003718313822, "compression/movement_sparsity/importance_threshold": -0.0019073894903849122, "compression/movement_sparsity/linear_layer_sparsity": 0.40168652565241414, "compression/movement_sparsity/model_sparsity": 0.3878873577639619, "compression_loss": 62.84801483154297, "distillation_loss": 1.3512821197509766, "epoch": 1.25, "learning_rate": 4.1039729501267964e-05, "loss": 64.4337, "step": 1484, "task_loss": 0.9420333504676819 }, { "compression/movement_sparsity/importance_regularization_factor": 0.58521121682564, "compression/movement_sparsity/importance_threshold": -0.0019009237688117075, "compression/movement_sparsity/linear_layer_sparsity": 0.4034974728396079, "compression/movement_sparsity/model_sparsity": 0.3896360933441504, "compression_loss": 62.9993896484375, "distillation_loss": 1.409013271331787, "epoch": 1.26, "learning_rate": 4.103369158314214e-05, "loss": 65.2569, "step": 1485, "task_loss": 1.5180048942565918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5866188698642603, "compression/movement_sparsity/importance_threshold": -0.0018944726755616079, "compression/movement_sparsity/linear_layer_sparsity": 0.40525641873174173, "compression/movement_sparsity/model_sparsity": 0.3913346140337389, "compression_loss": 63.15041732788086, "distillation_loss": 1.4148757457733154, "epoch": 1.26, "learning_rate": 4.1027653665016306e-05, "loss": 64.6494, "step": 1486, "task_loss": 0.908077597618103 }, { "compression/movement_sparsity/importance_regularization_factor": 0.588023334562142, "compression/movement_sparsity/importance_threshold": -0.0018880361940679944, "compression/movement_sparsity/linear_layer_sparsity": 0.4070098318100925, "compression/movement_sparsity/model_sparsity": 0.39302779197871873, "compression_loss": 63.301109313964844, "distillation_loss": 1.2821612358093262, "epoch": 1.26, "learning_rate": 4.102161574689047e-05, "loss": 64.812, "step": 1487, "task_loss": 0.6698021292686462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5894246145341837, "compression/movement_sparsity/importance_threshold": -0.0018816143077642505, "compression/movement_sparsity/linear_layer_sparsity": 0.4088464159577034, "compression/movement_sparsity/model_sparsity": 0.3948012838108655, "compression_loss": 63.451454162597656, "distillation_loss": 2.3698272705078125, "epoch": 1.26, "learning_rate": 4.101557782876465e-05, "loss": 65.0995, "step": 1488, "task_loss": 1.635695219039917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5908227133952841, "compression/movement_sparsity/importance_threshold": -0.00187520700008376, "compression/movement_sparsity/linear_layer_sparsity": 0.4106371993774249, "compression/movement_sparsity/model_sparsity": 0.3965305483110254, "compression_loss": 63.6014518737793, "distillation_loss": 1.8170621395111084, "epoch": 1.26, "learning_rate": 4.1009539910638814e-05, "loss": 65.4771, "step": 1489, "task_loss": 0.8675919771194458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5922176347603425, "compression/movement_sparsity/importance_threshold": -0.0018688142544599031, "compression/movement_sparsity/linear_layer_sparsity": 0.4123472084855811, "compression/movement_sparsity/model_sparsity": 0.3981818133457131, "compression_loss": 63.75110626220703, "distillation_loss": 1.74631929397583, "epoch": 1.26, "learning_rate": 4.100350199251298e-05, "loss": 65.1992, "step": 1490, "task_loss": 1.3837765455245972 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5936093822442572, "compression/movement_sparsity/importance_threshold": -0.0018624360543260648, "compression/movement_sparsity/linear_layer_sparsity": 0.4140209323516214, "compression/movement_sparsity/model_sparsity": 0.39979803964797805, "compression_loss": 63.900421142578125, "distillation_loss": 1.6118906736373901, "epoch": 1.26, "learning_rate": 4.0997464074387155e-05, "loss": 65.5723, "step": 1491, "task_loss": 0.735446035861969 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5949979594619271, "compression/movement_sparsity/importance_threshold": -0.0018560723831156272, "compression/movement_sparsity/linear_layer_sparsity": 0.4158927643387639, "compression/movement_sparsity/model_sparsity": 0.4016055684479335, "compression_loss": 64.04940032958984, "distillation_loss": 1.267628788948059, "epoch": 1.26, "learning_rate": 4.099142615626132e-05, "loss": 65.6439, "step": 1492, "task_loss": 1.2047793865203857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5963833700282513, "compression/movement_sparsity/importance_threshold": -0.0018497232242619716, "compression/movement_sparsity/linear_layer_sparsity": 0.41753936072343256, "compression/movement_sparsity/model_sparsity": 0.40319559918126585, "compression_loss": 64.19803619384766, "distillation_loss": 2.009300470352173, "epoch": 1.26, "learning_rate": 4.098538823813549e-05, "loss": 66.2305, "step": 1493, "task_loss": 2.6275837421417236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5977656175581283, "compression/movement_sparsity/importance_threshold": -0.0018433885611984818, "compression/movement_sparsity/linear_layer_sparsity": 0.4194638975315256, "compression/movement_sparsity/model_sparsity": 0.40505402222943315, "compression_loss": 64.3463134765625, "distillation_loss": 2.3168375492095947, "epoch": 1.26, "learning_rate": 4.097935032000966e-05, "loss": 66.3041, "step": 1494, "task_loss": 2.0208096504211426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.599144705666457, "compression/movement_sparsity/importance_threshold": -0.0018370683773585405, "compression/movement_sparsity/linear_layer_sparsity": 0.4213377923996691, "compression/movement_sparsity/model_sparsity": 0.406863543044081, "compression_loss": 64.49427032470703, "distillation_loss": 1.7509266138076782, "epoch": 1.26, "learning_rate": 4.097331240188384e-05, "loss": 65.9264, "step": 1495, "task_loss": 1.2533475160598755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.600520637968136, "compression/movement_sparsity/importance_threshold": -0.0018307626561755302, "compression/movement_sparsity/linear_layer_sparsity": 0.42311581696002026, "compression/movement_sparsity/model_sparsity": 0.4085804869909408, "compression_loss": 64.64186096191406, "distillation_loss": 2.404383897781372, "epoch": 1.26, "learning_rate": 4.0967274483758e-05, "loss": 66.5299, "step": 1496, "task_loss": 1.0763942003250122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6018934180780646, "compression/movement_sparsity/importance_threshold": -0.0018244713810828326, "compression/movement_sparsity/linear_layer_sparsity": 0.42488461221462126, "compression/movement_sparsity/model_sparsity": 0.4102885186870956, "compression_loss": 64.78911590576172, "distillation_loss": 1.7674527168273926, "epoch": 1.27, "learning_rate": 4.096123656563217e-05, "loss": 66.5745, "step": 1497, "task_loss": 1.0365124940872192 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6032630496111409, "compression/movement_sparsity/importance_threshold": -0.0018181945355138333, "compression/movement_sparsity/linear_layer_sparsity": 0.4267076028111273, "compression/movement_sparsity/model_sparsity": 0.4120488839484366, "compression_loss": 64.9360122680664, "distillation_loss": 2.997147798538208, "epoch": 1.27, "learning_rate": 4.0955198647506346e-05, "loss": 67.1675, "step": 1498, "task_loss": 2.137388229370117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6046295361822644, "compression/movement_sparsity/importance_threshold": -0.0018119321029019106, "compression/movement_sparsity/linear_layer_sparsity": 0.42874998616796556, "compression/movement_sparsity/model_sparsity": 0.4140211051538613, "compression_loss": 65.08262634277344, "distillation_loss": 2.2842767238616943, "epoch": 1.27, "learning_rate": 4.0949160729380506e-05, "loss": 66.8633, "step": 1499, "task_loss": 1.2904726266860962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6059928814063333, "compression/movement_sparsity/importance_threshold": -0.0018056840666804512, "compression/movement_sparsity/linear_layer_sparsity": 0.43044396919481914, "compression/movement_sparsity/model_sparsity": 0.41565689465244116, "compression_loss": 65.22886657714844, "distillation_loss": 2.4954652786254883, "epoch": 1.27, "learning_rate": 4.094312281125468e-05, "loss": 67.4995, "step": 1500, "task_loss": 2.5950334072113037 }, { "epoch": 1.27, "eval_accuracy": 0.8034851485148515, "eval_loss": 66.4759750366211, "eval_runtime": 322.643, "eval_samples_per_second": 78.26, "eval_steps_per_second": 0.614, "step": 1500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6073530888982468, "compression/movement_sparsity/importance_threshold": -0.0017994504102828357, "compression/movement_sparsity/linear_layer_sparsity": 0.4321422806945246, "compression/movement_sparsity/model_sparsity": 0.4172968639275144, "compression_loss": 65.37484741210938, "distillation_loss": 1.3618648052215576, "epoch": 1.27, "learning_rate": 4.0937084893128854e-05, "loss": 67.339, "step": 1501, "task_loss": 1.8817410469055176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6087101622729034, "compression/movement_sparsity/importance_threshold": -0.0017932311171424474, "compression/movement_sparsity/linear_layer_sparsity": 0.4338695798457528, "compression/movement_sparsity/model_sparsity": 0.4189648250391042, "compression_loss": 65.52043914794922, "distillation_loss": 3.2040064334869385, "epoch": 1.27, "learning_rate": 4.093104697500302e-05, "loss": 67.4966, "step": 1502, "task_loss": 1.990301251411438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.610064105145202, "compression/movement_sparsity/importance_threshold": -0.001787026170692669, "compression/movement_sparsity/linear_layer_sparsity": 0.4357496991294022, "compression/movement_sparsity/model_sparsity": 0.42078035644143685, "compression_loss": 65.66569519042969, "distillation_loss": 1.322669506072998, "epoch": 1.27, "learning_rate": 4.092500905687719e-05, "loss": 67.2386, "step": 1503, "task_loss": 0.4933289885520935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6114149211300417, "compression/movement_sparsity/importance_threshold": -0.001780835554366882, "compression/movement_sparsity/linear_layer_sparsity": 0.43755787990970446, "compression/movement_sparsity/model_sparsity": 0.422526420649321, "compression_loss": 65.81063842773438, "distillation_loss": 1.750702142715454, "epoch": 1.27, "learning_rate": 4.091897113875136e-05, "loss": 67.8444, "step": 1504, "task_loss": 0.5781861543655396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.612762613842321, "compression/movement_sparsity/importance_threshold": -0.00177465925159847, "compression/movement_sparsity/linear_layer_sparsity": 0.4393976358859065, "compression/movement_sparsity/model_sparsity": 0.4243029753479891, "compression_loss": 65.9552230834961, "distillation_loss": 2.2710812091827393, "epoch": 1.27, "learning_rate": 4.091293322062553e-05, "loss": 68.2226, "step": 1505, "task_loss": 1.0703234672546387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6141071868969384, "compression/movement_sparsity/importance_threshold": -0.0017684972458208172, "compression/movement_sparsity/linear_layer_sparsity": 0.4411666576996926, "compression/movement_sparsity/model_sparsity": 0.426011225820324, "compression_loss": 66.0994873046875, "distillation_loss": 1.38020658493042, "epoch": 1.27, "learning_rate": 4.09068953024997e-05, "loss": 67.7479, "step": 1506, "task_loss": 1.3242967128753662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6154486439087934, "compression/movement_sparsity/importance_threshold": -0.0017623495204673032, "compression/movement_sparsity/linear_layer_sparsity": 0.4425788487969755, "compression/movement_sparsity/model_sparsity": 0.4273749038090073, "compression_loss": 66.24342346191406, "distillation_loss": 2.608217239379883, "epoch": 1.27, "learning_rate": 4.090085738437387e-05, "loss": 68.3962, "step": 1507, "task_loss": 1.2110309600830078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6167869884927843, "compression/movement_sparsity/importance_threshold": -0.0017562160589713123, "compression/movement_sparsity/linear_layer_sparsity": 0.44444390785690085, "compression/movement_sparsity/model_sparsity": 0.4291758923526314, "compression_loss": 66.38700866699219, "distillation_loss": 3.089149236679077, "epoch": 1.27, "learning_rate": 4.089481946624804e-05, "loss": 68.3816, "step": 1508, "task_loss": 2.354871988296509 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6181222242638098, "compression/movement_sparsity/importance_threshold": -0.0017500968447662288, "compression/movement_sparsity/linear_layer_sparsity": 0.4460748716577989, "compression/movement_sparsity/model_sparsity": 0.43075082752953714, "compression_loss": 66.53028106689453, "distillation_loss": 1.8339509963989258, "epoch": 1.28, "learning_rate": 4.0888781548122205e-05, "loss": 68.6592, "step": 1509, "task_loss": 1.001955270767212 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6194543548367692, "compression/movement_sparsity/importance_threshold": -0.0017439918612854317, "compression/movement_sparsity/linear_layer_sparsity": 0.44794152855618746, "compression/movement_sparsity/model_sparsity": 0.4325533590209577, "compression_loss": 66.67321014404297, "distillation_loss": 2.0169079303741455, "epoch": 1.28, "learning_rate": 4.088274362999638e-05, "loss": 68.5626, "step": 1510, "task_loss": 2.4557337760925293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6207833838265611, "compression/movement_sparsity/importance_threshold": -0.0017379010919623059, "compression/movement_sparsity/linear_layer_sparsity": 0.44970384898776217, "compression/movement_sparsity/model_sparsity": 0.43425513832417606, "compression_loss": 66.81582641601562, "distillation_loss": 1.8284318447113037, "epoch": 1.28, "learning_rate": 4.087670571187055e-05, "loss": 68.7804, "step": 1511, "task_loss": 1.2928149700164795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.622109314848084, "compression/movement_sparsity/importance_threshold": -0.0017318245202302342, "compression/movement_sparsity/linear_layer_sparsity": 0.4515493404885971, "compression/movement_sparsity/model_sparsity": 0.43603723151456136, "compression_loss": 66.95807647705078, "distillation_loss": 1.4912865161895752, "epoch": 1.28, "learning_rate": 4.087066779374472e-05, "loss": 68.2124, "step": 1512, "task_loss": 1.07431960105896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.623432151516237, "compression/movement_sparsity/importance_threshold": -0.0017257621295225984, "compression/movement_sparsity/linear_layer_sparsity": 0.45338338478850154, "compression/movement_sparsity/model_sparsity": 0.43780827075058387, "compression_loss": 67.09994506835938, "distillation_loss": 2.535898208618164, "epoch": 1.28, "learning_rate": 4.086462987561889e-05, "loss": 69.3906, "step": 1513, "task_loss": 1.2843260765075684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6247518974459187, "compression/movement_sparsity/importance_threshold": -0.001719713903272782, "compression/movement_sparsity/linear_layer_sparsity": 0.45525541948649384, "compression/movement_sparsity/model_sparsity": 0.43961599529764783, "compression_loss": 67.24154663085938, "distillation_loss": 2.7242279052734375, "epoch": 1.28, "learning_rate": 4.085859195749306e-05, "loss": 69.7073, "step": 1514, "task_loss": 1.1735221147537231 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6260685562520281, "compression/movement_sparsity/importance_threshold": -0.0017136798249141679, "compression/movement_sparsity/linear_layer_sparsity": 0.45721074449542265, "compression/movement_sparsity/model_sparsity": 0.4415041488772366, "compression_loss": 67.38272857666016, "distillation_loss": 2.8908286094665527, "epoch": 1.28, "learning_rate": 4.085255403936723e-05, "loss": 69.6381, "step": 1515, "task_loss": 1.0332131385803223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6273821315494641, "compression/movement_sparsity/importance_threshold": -0.001707659877880136, "compression/movement_sparsity/linear_layer_sparsity": 0.45880827293026977, "compression/movement_sparsity/model_sparsity": 0.4430467972957744, "compression_loss": 67.52365112304688, "distillation_loss": 2.142420768737793, "epoch": 1.28, "learning_rate": 4.0846516121241396e-05, "loss": 69.6951, "step": 1516, "task_loss": 1.6825485229492188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.628692626953125, "compression/movement_sparsity/importance_threshold": -0.001701654045604073, "compression/movement_sparsity/linear_layer_sparsity": 0.4603581643154116, "compression/movement_sparsity/model_sparsity": 0.44454344514381305, "compression_loss": 67.66422271728516, "distillation_loss": 1.714110255241394, "epoch": 1.28, "learning_rate": 4.084047820311557e-05, "loss": 69.5832, "step": 1517, "task_loss": 0.8341822028160095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6300000460779098, "compression/movement_sparsity/importance_threshold": -0.00169566231151936, "compression/movement_sparsity/linear_layer_sparsity": 0.4622096059758968, "compression/movement_sparsity/model_sparsity": 0.4463312840875598, "compression_loss": 67.804443359375, "distillation_loss": 2.3912081718444824, "epoch": 1.28, "learning_rate": 4.083444028498974e-05, "loss": 69.7878, "step": 1518, "task_loss": 0.7938804626464844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6313043925387178, "compression/movement_sparsity/importance_threshold": -0.0016896846590593773, "compression/movement_sparsity/linear_layer_sparsity": 0.46372553733161165, "compression/movement_sparsity/model_sparsity": 0.44779513853765557, "compression_loss": 67.94435119628906, "distillation_loss": 1.2643492221832275, "epoch": 1.28, "learning_rate": 4.0828402366863904e-05, "loss": 70.0081, "step": 1519, "task_loss": 0.4568856954574585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6326056699504472, "compression/movement_sparsity/importance_threshold": -0.001683721071657511, "compression/movement_sparsity/linear_layer_sparsity": 0.465375484407386, "compression/movement_sparsity/model_sparsity": 0.44938840485554615, "compression_loss": 68.08392333984375, "distillation_loss": 1.6867640018463135, "epoch": 1.28, "learning_rate": 4.082236444873808e-05, "loss": 70.3051, "step": 1520, "task_loss": 1.9919369220733643 }, { "compression/movement_sparsity/importance_regularization_factor": 0.633903881927997, "compression/movement_sparsity/importance_threshold": -0.0016777715327471422, "compression/movement_sparsity/linear_layer_sparsity": 0.4668819956706686, "compression/movement_sparsity/model_sparsity": 0.45084316282236425, "compression_loss": 68.22319030761719, "distillation_loss": 2.9687774181365967, "epoch": 1.29, "learning_rate": 4.0816326530612245e-05, "loss": 70.4316, "step": 1521, "task_loss": 2.6519293785095215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6351990320862659, "compression/movement_sparsity/importance_threshold": -0.0016718360257616536, "compression/movement_sparsity/linear_layer_sparsity": 0.46852865167617547, "compression/movement_sparsity/model_sparsity": 0.45243325112837557, "compression_loss": 68.36207580566406, "distillation_loss": 2.0835490226745605, "epoch": 1.29, "learning_rate": 4.081028861248642e-05, "loss": 70.2344, "step": 1522, "task_loss": 1.6220988035202026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6364911240401527, "compression/movement_sparsity/importance_threshold": -0.0016659145341344286, "compression/movement_sparsity/linear_layer_sparsity": 0.47006359015510196, "compression/movement_sparsity/model_sparsity": 0.4539154597485278, "compression_loss": 68.50068664550781, "distillation_loss": 2.7911367416381836, "epoch": 1.29, "learning_rate": 4.0804250694360586e-05, "loss": 70.5184, "step": 1523, "task_loss": 2.0097663402557373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6377801614045565, "compression/movement_sparsity/importance_threshold": -0.0016600070412988483, "compression/movement_sparsity/linear_layer_sparsity": 0.4715951421704198, "compression/movement_sparsity/model_sparsity": 0.4553943982405145, "compression_loss": 68.63899230957031, "distillation_loss": 2.5849769115448, "epoch": 1.29, "learning_rate": 4.079821277623475e-05, "loss": 70.6105, "step": 1524, "task_loss": 1.144644856452942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6390661477943758, "compression/movement_sparsity/importance_threshold": -0.0016541135306882972, "compression/movement_sparsity/linear_layer_sparsity": 0.4732799674365291, "compression/movement_sparsity/model_sparsity": 0.4570213445756041, "compression_loss": 68.7769546508789, "distillation_loss": 2.142005205154419, "epoch": 1.29, "learning_rate": 4.079217485810893e-05, "loss": 70.5771, "step": 1525, "task_loss": 1.343470811843872 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6403490868245094, "compression/movement_sparsity/importance_threshold": -0.0016482339857361566, "compression/movement_sparsity/linear_layer_sparsity": 0.47482863063240444, "compression/movement_sparsity/model_sparsity": 0.45851680642645587, "compression_loss": 68.91455078125, "distillation_loss": 1.9053081274032593, "epoch": 1.29, "learning_rate": 4.0786136939983095e-05, "loss": 70.6562, "step": 1526, "task_loss": 1.1693254709243774 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6416289821098562, "compression/movement_sparsity/importance_threshold": -0.00164236838987581, "compression/movement_sparsity/linear_layer_sparsity": 0.47662350404162507, "compression/movement_sparsity/model_sparsity": 0.4602500204123933, "compression_loss": 69.0518569946289, "distillation_loss": 3.219198226928711, "epoch": 1.29, "learning_rate": 4.078009902185727e-05, "loss": 71.6091, "step": 1527, "task_loss": 1.846290946006775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.642905837265315, "compression/movement_sparsity/importance_threshold": -0.0016365167265406395, "compression/movement_sparsity/linear_layer_sparsity": 0.4785811900357458, "compression/movement_sparsity/model_sparsity": 0.46214045387006947, "compression_loss": 69.18885040283203, "distillation_loss": 2.374037742614746, "epoch": 1.29, "learning_rate": 4.0774061103731436e-05, "loss": 71.7983, "step": 1528, "task_loss": 0.9507524967193604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6441796559057844, "compression/movement_sparsity/importance_threshold": -0.0016306789791640285, "compression/movement_sparsity/linear_layer_sparsity": 0.48018036400572667, "compression/movement_sparsity/model_sparsity": 0.4636846912945469, "compression_loss": 69.32549285888672, "distillation_loss": 1.888604760169983, "epoch": 1.29, "learning_rate": 4.07680231856056e-05, "loss": 71.4122, "step": 1529, "task_loss": 2.4681990146636963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6454504416461636, "compression/movement_sparsity/importance_threshold": -0.0016248551311793583, "compression/movement_sparsity/linear_layer_sparsity": 0.48178852879810496, "compression/movement_sparsity/model_sparsity": 0.4652376106790134, "compression_loss": 69.46179962158203, "distillation_loss": 1.673600673675537, "epoch": 1.29, "learning_rate": 4.076198526747978e-05, "loss": 71.0703, "step": 1530, "task_loss": 1.0627106428146362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.646718198101351, "compression/movement_sparsity/importance_threshold": -0.0016190451660200132, "compression/movement_sparsity/linear_layer_sparsity": 0.48336758669728414, "compression/movement_sparsity/model_sparsity": 0.4667624230816055, "compression_loss": 69.5978012084961, "distillation_loss": 2.0809035301208496, "epoch": 1.29, "learning_rate": 4.0755947349353944e-05, "loss": 71.9862, "step": 1531, "task_loss": 0.8997868299484253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6479829288862455, "compression/movement_sparsity/importance_threshold": -0.0016132490671193754, "compression/movement_sparsity/linear_layer_sparsity": 0.48510428209260936, "compression/movement_sparsity/model_sparsity": 0.4684394576474014, "compression_loss": 69.7333984375, "distillation_loss": 2.285703420639038, "epoch": 1.29, "learning_rate": 4.074990943122812e-05, "loss": 71.4123, "step": 1532, "task_loss": 1.6942522525787354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6492446376157461, "compression/movement_sparsity/importance_threshold": -0.0016074668179108264, "compression/movement_sparsity/linear_layer_sparsity": 0.4868974264975228, "compression/movement_sparsity/model_sparsity": 0.47017100202564865, "compression_loss": 69.86862182617188, "distillation_loss": 2.382129430770874, "epoch": 1.3, "learning_rate": 4.0743871513102285e-05, "loss": 72.4544, "step": 1533, "task_loss": 1.6816794872283936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6505033279047514, "compression/movement_sparsity/importance_threshold": -0.0016016984018277503, "compression/movement_sparsity/linear_layer_sparsity": 0.488613528855341, "compression/movement_sparsity/model_sparsity": 0.47182815098812736, "compression_loss": 70.00361633300781, "distillation_loss": 2.4940595626831055, "epoch": 1.3, "learning_rate": 4.073783359497645e-05, "loss": 71.9411, "step": 1534, "task_loss": 1.8234903812408447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6517590033681602, "compression/movement_sparsity/importance_threshold": -0.001595943802303529, "compression/movement_sparsity/linear_layer_sparsity": 0.49027778493227836, "compression/movement_sparsity/model_sparsity": 0.4734352347489714, "compression_loss": 70.13825225830078, "distillation_loss": 2.0523476600646973, "epoch": 1.3, "learning_rate": 4.0731795676850626e-05, "loss": 71.8683, "step": 1535, "task_loss": 1.0100599527359009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6530116676208715, "compression/movement_sparsity/importance_threshold": -0.0015902030027715448, "compression/movement_sparsity/linear_layer_sparsity": 0.49208045674713285, "compression/movement_sparsity/model_sparsity": 0.47517597924131855, "compression_loss": 70.27252960205078, "distillation_loss": 2.6383814811706543, "epoch": 1.3, "learning_rate": 4.0725757758724793e-05, "loss": 72.484, "step": 1536, "task_loss": 2.095336437225342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6542613242777837, "compression/movement_sparsity/importance_threshold": -0.001584475986665182, "compression/movement_sparsity/linear_layer_sparsity": 0.4938860976797287, "compression/movement_sparsity/model_sparsity": 0.47691959085307845, "compression_loss": 70.40656280517578, "distillation_loss": 3.0288615226745605, "epoch": 1.3, "learning_rate": 4.071971984059896e-05, "loss": 72.82, "step": 1537, "task_loss": 2.09696626663208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6555079769537959, "compression/movement_sparsity/importance_threshold": -0.0015787627374178217, "compression/movement_sparsity/linear_layer_sparsity": 0.49552646964889147, "compression/movement_sparsity/model_sparsity": 0.47850361099872607, "compression_loss": 70.54019927978516, "distillation_loss": 2.992490291595459, "epoch": 1.3, "learning_rate": 4.0713681922473135e-05, "loss": 73.1134, "step": 1538, "task_loss": 2.076934337615967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6567516292638069, "compression/movement_sparsity/importance_threshold": -0.001573063238462847, "compression/movement_sparsity/linear_layer_sparsity": 0.497490809328553, "compression/movement_sparsity/model_sparsity": 0.48040046956737553, "compression_loss": 70.67351531982422, "distillation_loss": 2.202881336212158, "epoch": 1.3, "learning_rate": 4.07076440043473e-05, "loss": 73.2225, "step": 1539, "task_loss": 1.1624072790145874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6579922848227153, "compression/movement_sparsity/importance_threshold": -0.001567377473233641, "compression/movement_sparsity/linear_layer_sparsity": 0.49904648393499823, "compression/movement_sparsity/model_sparsity": 0.48190270196527446, "compression_loss": 70.80653381347656, "distillation_loss": 2.9827561378479004, "epoch": 1.3, "learning_rate": 4.070160608622147e-05, "loss": 73.508, "step": 1540, "task_loss": 1.728935956954956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6592299472454202, "compression/movement_sparsity/importance_threshold": -0.0015617054251635858, "compression/movement_sparsity/linear_layer_sparsity": 0.5005965064859841, "compression/movement_sparsity/model_sparsity": 0.48339947647320686, "compression_loss": 70.93925476074219, "distillation_loss": 2.506899118423462, "epoch": 1.3, "learning_rate": 4.069556816809564e-05, "loss": 73.1221, "step": 1541, "task_loss": 1.3512152433395386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6604646201468201, "compression/movement_sparsity/importance_threshold": -0.0015560470776860647, "compression/movement_sparsity/linear_layer_sparsity": 0.502389960919256, "compression/movement_sparsity/model_sparsity": 0.48513132022938477, "compression_loss": 71.07158660888672, "distillation_loss": 2.8874993324279785, "epoch": 1.3, "learning_rate": 4.068953024996982e-05, "loss": 73.6739, "step": 1542, "task_loss": 2.066234588623047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6616963071418138, "compression/movement_sparsity/importance_threshold": -0.0015504024142344599, "compression/movement_sparsity/linear_layer_sparsity": 0.5040302732675807, "compression/movement_sparsity/model_sparsity": 0.4867152828023534, "compression_loss": 71.20362091064453, "distillation_loss": 5.933948993682861, "epoch": 1.3, "learning_rate": 4.0683492331843984e-05, "loss": 74.7576, "step": 1543, "task_loss": 2.527026653289795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6629250118453004, "compression/movement_sparsity/importance_threshold": -0.001544771418242154, "compression/movement_sparsity/linear_layer_sparsity": 0.5055419596196171, "compression/movement_sparsity/model_sparsity": 0.4881750380777063, "compression_loss": 71.33536529541016, "distillation_loss": 3.4576711654663086, "epoch": 1.3, "learning_rate": 4.067745441371815e-05, "loss": 74.0595, "step": 1544, "task_loss": 2.6623458862304688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6641507378721785, "compression/movement_sparsity/importance_threshold": -0.0015391540731425294, "compression/movement_sparsity/linear_layer_sparsity": 0.5069400682749221, "compression/movement_sparsity/model_sparsity": 0.4895251173996162, "compression_loss": 71.46675872802734, "distillation_loss": 1.8407223224639893, "epoch": 1.31, "learning_rate": 4.0671416495592325e-05, "loss": 73.8632, "step": 1545, "task_loss": 0.7983624935150146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6653734888373467, "compression/movement_sparsity/importance_threshold": -0.0015335503623689697, "compression/movement_sparsity/linear_layer_sparsity": 0.508401673122888, "compression/movement_sparsity/model_sparsity": 0.49093651162463203, "compression_loss": 71.59788513183594, "distillation_loss": 2.3952512741088867, "epoch": 1.31, "learning_rate": 4.066537857746649e-05, "loss": 73.7952, "step": 1546, "task_loss": 1.3085087537765503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6665932683557042, "compression/movement_sparsity/importance_threshold": -0.0015279602693548566, "compression/movement_sparsity/linear_layer_sparsity": 0.5101598081716225, "compression/movement_sparsity/model_sparsity": 0.49263424932578653, "compression_loss": 71.72866821289062, "distillation_loss": 2.6156041622161865, "epoch": 1.31, "learning_rate": 4.065934065934066e-05, "loss": 74.3482, "step": 1547, "task_loss": 1.6856050491333008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6678100800421496, "compression/movement_sparsity/importance_threshold": -0.0015223837775335735, "compression/movement_sparsity/linear_layer_sparsity": 0.5118381824630408, "compression/movement_sparsity/model_sparsity": 0.4942549662970113, "compression_loss": 71.8591079711914, "distillation_loss": 3.463369607925415, "epoch": 1.31, "learning_rate": 4.0653302741214834e-05, "loss": 74.5622, "step": 1548, "task_loss": 2.485182046890259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6690239275115815, "compression/movement_sparsity/importance_threshold": -0.0015168208703385034, "compression/movement_sparsity/linear_layer_sparsity": 0.5132960669707043, "compression/movement_sparsity/model_sparsity": 0.49566276798685927, "compression_loss": 71.98922729492188, "distillation_loss": 2.4372365474700928, "epoch": 1.31, "learning_rate": 4.0647264823089e-05, "loss": 74.2313, "step": 1549, "task_loss": 1.101418375968933 }, { "compression/movement_sparsity/importance_regularization_factor": 0.670234814378899, "compression/movement_sparsity/importance_threshold": -0.001511271531203028, "compression/movement_sparsity/linear_layer_sparsity": 0.5147583634203932, "compression/movement_sparsity/model_sparsity": 0.4970748300549512, "compression_loss": 72.11900329589844, "distillation_loss": 2.409860134124756, "epoch": 1.31, "learning_rate": 4.064122690496317e-05, "loss": 74.4355, "step": 1550, "task_loss": 1.2888054847717285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.671442744259001, "compression/movement_sparsity/importance_threshold": -0.001505735743560529, "compression/movement_sparsity/linear_layer_sparsity": 0.5164189468536987, "compression/movement_sparsity/model_sparsity": 0.4986783673387705, "compression_loss": 72.24850463867188, "distillation_loss": 2.3860180377960205, "epoch": 1.31, "learning_rate": 4.063518898683734e-05, "loss": 75.0297, "step": 1551, "task_loss": 1.0986566543579102 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6726477207667857, "compression/movement_sparsity/importance_threshold": -0.0015002134908443923, "compression/movement_sparsity/linear_layer_sparsity": 0.5180920983596924, "compression/movement_sparsity/model_sparsity": 0.5002940409433173, "compression_loss": 72.37773132324219, "distillation_loss": 2.3349266052246094, "epoch": 1.31, "learning_rate": 4.0629151068711516e-05, "loss": 74.9249, "step": 1552, "task_loss": 0.6151782870292664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6738497475171527, "compression/movement_sparsity/importance_threshold": -0.0014947047564879967, "compression/movement_sparsity/linear_layer_sparsity": 0.5197754331048472, "compression/movement_sparsity/model_sparsity": 0.5019195479614326, "compression_loss": 72.5064697265625, "distillation_loss": 3.3644208908081055, "epoch": 1.31, "learning_rate": 4.0623113150585676e-05, "loss": 75.03, "step": 1553, "task_loss": 1.9200294017791748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.675048828125, "compression/movement_sparsity/importance_threshold": -0.0014892095239247283, "compression/movement_sparsity/linear_layer_sparsity": 0.5213604650120119, "compression/movement_sparsity/model_sparsity": 0.5034501291464577, "compression_loss": 72.63496398925781, "distillation_loss": 2.3354477882385254, "epoch": 1.31, "learning_rate": 4.061707523245985e-05, "loss": 74.9513, "step": 1554, "task_loss": 2.2495973110198975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6762449662052268, "compression/movement_sparsity/importance_threshold": -0.0014837277765879686, "compression/movement_sparsity/linear_layer_sparsity": 0.5230621152786554, "compression/movement_sparsity/model_sparsity": 0.5050933224915535, "compression_loss": 72.76316833496094, "distillation_loss": 1.5766487121582031, "epoch": 1.31, "learning_rate": 4.0611037314334024e-05, "loss": 74.63, "step": 1555, "task_loss": 0.8907100558280945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6774381653727322, "compression/movement_sparsity/importance_threshold": -0.0014782594979110982, "compression/movement_sparsity/linear_layer_sparsity": 0.5247789688590346, "compression/movement_sparsity/model_sparsity": 0.5067511968697873, "compression_loss": 72.89103698730469, "distillation_loss": 3.0848073959350586, "epoch": 1.32, "learning_rate": 4.0604999396208185e-05, "loss": 75.0904, "step": 1556, "task_loss": 1.9270751476287842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6786284292424143, "compression/movement_sparsity/importance_threshold": -0.001472804671327503, "compression/movement_sparsity/linear_layer_sparsity": 0.5263793948666172, "compression/movement_sparsity/model_sparsity": 0.5082966433205232, "compression_loss": 73.01856231689453, "distillation_loss": 3.453660488128662, "epoch": 1.32, "learning_rate": 4.059896147808236e-05, "loss": 75.8136, "step": 1557, "task_loss": 2.2347285747528076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6798157614291725, "compression/movement_sparsity/importance_threshold": -0.001467363280270563, "compression/movement_sparsity/linear_layer_sparsity": 0.5280151283345697, "compression/movement_sparsity/model_sparsity": 0.5098761843117466, "compression_loss": 73.14575958251953, "distillation_loss": 2.815380096435547, "epoch": 1.32, "learning_rate": 4.059292355995653e-05, "loss": 75.7952, "step": 1558, "task_loss": 0.9889405965805054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6810001655479054, "compression/movement_sparsity/importance_threshold": -0.0014619353081736625, "compression/movement_sparsity/linear_layer_sparsity": 0.5296684857222879, "compression/movement_sparsity/model_sparsity": 0.5114727437868745, "compression_loss": 73.27266693115234, "distillation_loss": 1.9708774089813232, "epoch": 1.32, "learning_rate": 4.05868856418307e-05, "loss": 76.0325, "step": 1559, "task_loss": 1.7438136339187622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6821816452135114, "compression/movement_sparsity/importance_threshold": -0.001456520738470185, "compression/movement_sparsity/linear_layer_sparsity": 0.5313475754637643, "compression/movement_sparsity/model_sparsity": 0.513094151630247, "compression_loss": 73.39927673339844, "distillation_loss": 3.6250479221343994, "epoch": 1.32, "learning_rate": 4.058084772370487e-05, "loss": 77.0917, "step": 1560, "task_loss": 2.6081273555755615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.68336020404089, "compression/movement_sparsity/importance_threshold": -0.0014511195545935103, "compression/movement_sparsity/linear_layer_sparsity": 0.5330613287605581, "compression/movement_sparsity/model_sparsity": 0.5147490322291741, "compression_loss": 73.52556610107422, "distillation_loss": 4.573336601257324, "epoch": 1.32, "learning_rate": 4.057480980557904e-05, "loss": 77.0224, "step": 1561, "task_loss": 3.017275333404541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6845358456449396, "compression/movement_sparsity/importance_threshold": -0.0014457317399770232, "compression/movement_sparsity/linear_layer_sparsity": 0.5346219518965722, "compression/movement_sparsity/model_sparsity": 0.5162560431594279, "compression_loss": 73.65158081054688, "distillation_loss": 1.9310706853866577, "epoch": 1.32, "learning_rate": 4.056877188745321e-05, "loss": 76.1081, "step": 1562, "task_loss": 1.4165972471237183 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6857085736405589, "compression/movement_sparsity/importance_threshold": -0.0014403572780541064, "compression/movement_sparsity/linear_layer_sparsity": 0.5362206131273448, "compression/movement_sparsity/model_sparsity": 0.5177997854588662, "compression_loss": 73.77727508544922, "distillation_loss": 2.0768113136291504, "epoch": 1.32, "learning_rate": 4.0562733969327375e-05, "loss": 76.9937, "step": 1563, "task_loss": 1.1387596130371094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6868783916426471, "compression/movement_sparsity/importance_threshold": -0.0014349961522581406, "compression/movement_sparsity/linear_layer_sparsity": 0.5378460798869628, "compression/movement_sparsity/model_sparsity": 0.5193694124347706, "compression_loss": 73.90264892578125, "distillation_loss": 2.0928730964660645, "epoch": 1.32, "learning_rate": 4.055669605120155e-05, "loss": 76.2278, "step": 1564, "task_loss": 1.2993205785751343 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6880453032661026, "compression/movement_sparsity/importance_threshold": -0.001429648346022511, "compression/movement_sparsity/linear_layer_sparsity": 0.5393599602858442, "compression/movement_sparsity/model_sparsity": 0.5208312863847097, "compression_loss": 74.02771759033203, "distillation_loss": 2.0474445819854736, "epoch": 1.32, "learning_rate": 4.0550658133075716e-05, "loss": 76.3389, "step": 1565, "task_loss": 1.0505938529968262 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6892093121258241, "compression/movement_sparsity/importance_threshold": -0.0014243138427805992, "compression/movement_sparsity/linear_layer_sparsity": 0.5408692498801859, "compression/movement_sparsity/model_sparsity": 0.522288727238368, "compression_loss": 74.15248107910156, "distillation_loss": 2.4622671604156494, "epoch": 1.32, "learning_rate": 4.0544620214949883e-05, "loss": 77.5324, "step": 1566, "task_loss": 1.2950594425201416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.690370421836711, "compression/movement_sparsity/importance_threshold": -0.001418992625965787, "compression/movement_sparsity/linear_layer_sparsity": 0.5423607247680797, "compression/movement_sparsity/model_sparsity": 0.523728965375549, "compression_loss": 74.27690124511719, "distillation_loss": 3.7175450325012207, "epoch": 1.32, "learning_rate": 4.053858229682406e-05, "loss": 76.8871, "step": 1567, "task_loss": 1.9874897003173828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6915286360136617, "compression/movement_sparsity/importance_threshold": -0.0014136846790114579, "compression/movement_sparsity/linear_layer_sparsity": 0.5438376640956253, "compression/movement_sparsity/model_sparsity": 0.5251551672935967, "compression_loss": 74.4010238647461, "distillation_loss": 3.6326699256896973, "epoch": 1.33, "learning_rate": 4.053254437869823e-05, "loss": 77.1658, "step": 1568, "task_loss": 2.191239356994629 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6926839582715747, "compression/movement_sparsity/importance_threshold": -0.001408389985350996, "compression/movement_sparsity/linear_layer_sparsity": 0.5455743237184477, "compression/movement_sparsity/model_sparsity": 0.5268321673157852, "compression_loss": 74.52486419677734, "distillation_loss": 3.2507877349853516, "epoch": 1.33, "learning_rate": 4.052650646057239e-05, "loss": 77.8213, "step": 1569, "task_loss": 1.8562309741973877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6938363922253494, "compression/movement_sparsity/importance_threshold": -0.001403108528417781, "compression/movement_sparsity/linear_layer_sparsity": 0.5468445695241932, "compression/movement_sparsity/model_sparsity": 0.5280587762703702, "compression_loss": 74.64835357666016, "distillation_loss": 3.6294198036193848, "epoch": 1.33, "learning_rate": 4.0520468542446566e-05, "loss": 77.9981, "step": 1570, "task_loss": 2.0455856323242188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6949859414898842, "compression/movement_sparsity/importance_threshold": -0.0013978402916451976, "compression/movement_sparsity/linear_layer_sparsity": 0.5484132176250264, "compression/movement_sparsity/model_sparsity": 0.5295735364832137, "compression_loss": 74.7715835571289, "distillation_loss": 4.076459884643555, "epoch": 1.33, "learning_rate": 4.051443062432074e-05, "loss": 77.6708, "step": 1571, "task_loss": 1.976843237876892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6961326096800781, "compression/movement_sparsity/importance_threshold": -0.0013925852584666279, "compression/movement_sparsity/linear_layer_sparsity": 0.5499194427082856, "compression/movement_sparsity/model_sparsity": 0.5310280181011727, "compression_loss": 74.89442443847656, "distillation_loss": 2.325430393218994, "epoch": 1.33, "learning_rate": 4.050839270619491e-05, "loss": 77.7206, "step": 1572, "task_loss": 1.0518248081207275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6972764004108299, "compression/movement_sparsity/importance_threshold": -0.0013873434123154536, "compression/movement_sparsity/linear_layer_sparsity": 0.5516443570259867, "compression/movement_sparsity/model_sparsity": 0.5326936763056036, "compression_loss": 75.01702117919922, "distillation_loss": 2.943516731262207, "epoch": 1.33, "learning_rate": 4.0502354788069074e-05, "loss": 78.2388, "step": 1573, "task_loss": 1.661414623260498 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6984173172970382, "compression/movement_sparsity/importance_threshold": -0.0013821147366250598, "compression/movement_sparsity/linear_layer_sparsity": 0.5532052544178564, "compression/movement_sparsity/model_sparsity": 0.5342009520701806, "compression_loss": 75.13928985595703, "distillation_loss": 2.5520753860473633, "epoch": 1.33, "learning_rate": 4.049631686994325e-05, "loss": 77.2638, "step": 1574, "task_loss": 1.2336891889572144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.699555363953602, "compression/movement_sparsity/importance_threshold": -0.001376899214828827, "compression/movement_sparsity/linear_layer_sparsity": 0.554708259975854, "compression/movement_sparsity/model_sparsity": 0.535652324763475, "compression_loss": 75.2612075805664, "distillation_loss": 3.1821746826171875, "epoch": 1.33, "learning_rate": 4.0490278951817415e-05, "loss": 78.3764, "step": 1575, "task_loss": 0.9213689565658569 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7006905439954199, "compression/movement_sparsity/importance_threshold": -0.0013716968303601387, "compression/movement_sparsity/linear_layer_sparsity": 0.5563192627201297, "compression/movement_sparsity/model_sparsity": 0.5372079846074607, "compression_loss": 75.38291931152344, "distillation_loss": 5.210402011871338, "epoch": 1.33, "learning_rate": 4.048424103369158e-05, "loss": 78.6591, "step": 1576, "task_loss": 2.731072425842285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7018228610373907, "compression/movement_sparsity/importance_threshold": -0.0013665075666523781, "compression/movement_sparsity/linear_layer_sparsity": 0.5576681245630984, "compression/movement_sparsity/model_sparsity": 0.5385105088965392, "compression_loss": 75.5042953491211, "distillation_loss": 2.962397575378418, "epoch": 1.33, "learning_rate": 4.0478203115565756e-05, "loss": 79.0451, "step": 1577, "task_loss": 1.7134435176849365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7029523186944135, "compression/movement_sparsity/importance_threshold": -0.0013613314071389262, "compression/movement_sparsity/linear_layer_sparsity": 0.5590602472862503, "compression/movement_sparsity/model_sparsity": 0.5398548079214802, "compression_loss": 75.62535095214844, "distillation_loss": 3.2340426445007324, "epoch": 1.33, "learning_rate": 4.0472165197439924e-05, "loss": 78.7083, "step": 1578, "task_loss": 1.1052370071411133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7040789205813869, "compression/movement_sparsity/importance_threshold": -0.0013561683352531673, "compression/movement_sparsity/linear_layer_sparsity": 0.5605255128536804, "compression/movement_sparsity/model_sparsity": 0.541269737108985, "compression_loss": 75.7461166381836, "distillation_loss": 2.7094974517822266, "epoch": 1.33, "learning_rate": 4.046612727931409e-05, "loss": 78.2599, "step": 1579, "task_loss": 2.2124032974243164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7052026703132095, "compression/movement_sparsity/importance_threshold": -0.0013510183344284839, "compression/movement_sparsity/linear_layer_sparsity": 0.5619459197024643, "compression/movement_sparsity/model_sparsity": 0.5426413486128306, "compression_loss": 75.86653137207031, "distillation_loss": 3.5356016159057617, "epoch": 1.34, "learning_rate": 4.0460089361188265e-05, "loss": 79.2272, "step": 1580, "task_loss": 1.2559728622436523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7063235715047804, "compression/movement_sparsity/importance_threshold": -0.0013458813880982577, "compression/movement_sparsity/linear_layer_sparsity": 0.563343479846058, "compression/movement_sparsity/model_sparsity": 0.543990898266094, "compression_loss": 75.98665618896484, "distillation_loss": 2.501697301864624, "epoch": 1.34, "learning_rate": 4.045405144306243e-05, "loss": 78.8303, "step": 1581, "task_loss": 1.6460388898849487 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7074416277709983, "compression/movement_sparsity/importance_threshold": -0.0013407574796958717, "compression/movement_sparsity/linear_layer_sparsity": 0.5646555914043732, "compression/movement_sparsity/model_sparsity": 0.5452579347558537, "compression_loss": 76.10639953613281, "distillation_loss": 3.548956871032715, "epoch": 1.34, "learning_rate": 4.0448013524936606e-05, "loss": 79.238, "step": 1582, "task_loss": 2.14089298248291 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7085568427267619, "compression/movement_sparsity/importance_threshold": -0.0013356465926547094, "compression/movement_sparsity/linear_layer_sparsity": 0.5662896435646889, "compression/movement_sparsity/model_sparsity": 0.5468358521975302, "compression_loss": 76.22590637207031, "distillation_loss": 2.807504892349243, "epoch": 1.34, "learning_rate": 4.044197560681077e-05, "loss": 78.8801, "step": 1583, "task_loss": 1.8368595838546753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7096692199869701, "compression/movement_sparsity/importance_threshold": -0.0013305487104081529, "compression/movement_sparsity/linear_layer_sparsity": 0.5679238745875191, "compression/movement_sparsity/model_sparsity": 0.5484139423572436, "compression_loss": 76.34514617919922, "distillation_loss": 3.3720178604125977, "epoch": 1.34, "learning_rate": 4.043593768868495e-05, "loss": 79.7643, "step": 1584, "task_loss": 3.2723031044006348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7107787631665219, "compression/movement_sparsity/importance_threshold": -0.0013254638163895838, "compression/movement_sparsity/linear_layer_sparsity": 0.5694741833185283, "compression/movement_sparsity/model_sparsity": 0.549910993214035, "compression_loss": 76.46407318115234, "distillation_loss": 4.017096519470215, "epoch": 1.34, "learning_rate": 4.0429899770559114e-05, "loss": 79.7398, "step": 1585, "task_loss": 1.9686932563781738 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7118854758803155, "compression/movement_sparsity/importance_threshold": -0.0013203918940323874, "compression/movement_sparsity/linear_layer_sparsity": 0.5708972731050304, "compression/movement_sparsity/model_sparsity": 0.5512851954884345, "compression_loss": 76.58258819580078, "distillation_loss": 3.8623080253601074, "epoch": 1.34, "learning_rate": 4.042386185243328e-05, "loss": 80.2716, "step": 1586, "task_loss": 1.984721302986145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7129893617432503, "compression/movement_sparsity/importance_threshold": -0.0013153329267699431, "compression/movement_sparsity/linear_layer_sparsity": 0.5724575742885183, "compression/movement_sparsity/model_sparsity": 0.5527918955262218, "compression_loss": 76.70094299316406, "distillation_loss": 3.126573085784912, "epoch": 1.34, "learning_rate": 4.0417823934307455e-05, "loss": 79.1811, "step": 1587, "task_loss": 1.1535747051239014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7140904243702249, "compression/movement_sparsity/importance_threshold": -0.0013102868980356358, "compression/movement_sparsity/linear_layer_sparsity": 0.574006845616943, "compression/movement_sparsity/model_sparsity": 0.554287944618399, "compression_loss": 76.81893157958984, "distillation_loss": 2.5346484184265137, "epoch": 1.34, "learning_rate": 4.041178601618162e-05, "loss": 79.3375, "step": 1588, "task_loss": 1.3333731889724731 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7151886673761378, "compression/movement_sparsity/importance_threshold": -0.0013052537912628484, "compression/movement_sparsity/linear_layer_sparsity": 0.575348696049342, "compression/movement_sparsity/model_sparsity": 0.5555836983604303, "compression_loss": 76.93657684326172, "distillation_loss": 3.5370612144470215, "epoch": 1.34, "learning_rate": 4.040574809805579e-05, "loss": 79.9341, "step": 1589, "task_loss": 1.2191483974456787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7162840943758886, "compression/movement_sparsity/importance_threshold": -0.0013002335898849608, "compression/movement_sparsity/linear_layer_sparsity": 0.5766538558179254, "compression/movement_sparsity/model_sparsity": 0.5568440218758217, "compression_loss": 77.05391693115234, "distillation_loss": 3.713824510574341, "epoch": 1.34, "learning_rate": 4.0399710179929964e-05, "loss": 80.4986, "step": 1590, "task_loss": 2.3262598514556885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.717376708984375, "compression/movement_sparsity/importance_threshold": -0.0012952262773353596, "compression/movement_sparsity/linear_layer_sparsity": 0.5781355648125254, "compression/movement_sparsity/model_sparsity": 0.5582748296081872, "compression_loss": 77.17098999023438, "distillation_loss": 2.8069818019866943, "epoch": 1.34, "learning_rate": 4.039367226180413e-05, "loss": 80.4422, "step": 1591, "task_loss": 2.0016496181488037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7184665148164965, "compression/movement_sparsity/importance_threshold": -0.001290231837047425, "compression/movement_sparsity/linear_layer_sparsity": 0.579621673824983, "compression/movement_sparsity/model_sparsity": 0.5597098862042608, "compression_loss": 77.28773498535156, "distillation_loss": 3.4514570236206055, "epoch": 1.35, "learning_rate": 4.03876343436783e-05, "loss": 80.2721, "step": 1592, "task_loss": 2.073960542678833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.719553515487152, "compression/movement_sparsity/importance_threshold": -0.0012852502524545394, "compression/movement_sparsity/linear_layer_sparsity": 0.5811087367708515, "compression/movement_sparsity/model_sparsity": 0.561145863963198, "compression_loss": 77.4041748046875, "distillation_loss": 3.3894505500793457, "epoch": 1.35, "learning_rate": 4.038159642555247e-05, "loss": 80.7056, "step": 1593, "task_loss": 2.218897819519043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7206377146112397, "compression/movement_sparsity/importance_threshold": -0.001280281506990087, "compression/movement_sparsity/linear_layer_sparsity": 0.5824611520157758, "compression/movement_sparsity/model_sparsity": 0.5624518195839432, "compression_loss": 77.52032470703125, "distillation_loss": 2.31794810295105, "epoch": 1.35, "learning_rate": 4.037555850742664e-05, "loss": 81.1055, "step": 1594, "task_loss": 1.7292835712432861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7217191158036589, "compression/movement_sparsity/importance_threshold": -0.0012753255840874497, "compression/movement_sparsity/linear_layer_sparsity": 0.5838960467282374, "compression/movement_sparsity/model_sparsity": 0.5638374212487792, "compression_loss": 77.63619995117188, "distillation_loss": 2.945887804031372, "epoch": 1.35, "learning_rate": 4.036952058930081e-05, "loss": 80.9016, "step": 1595, "task_loss": 1.8880817890167236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7227977226793083, "compression/movement_sparsity/importance_threshold": -0.0012703824671800095, "compression/movement_sparsity/linear_layer_sparsity": 0.5853601437272391, "compression/movement_sparsity/model_sparsity": 0.5652512220117761, "compression_loss": 77.75167083740234, "distillation_loss": 1.8692739009857178, "epoch": 1.35, "learning_rate": 4.036348267117498e-05, "loss": 80.4524, "step": 1596, "task_loss": 0.971859335899353 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7238735388530865, "compression/movement_sparsity/importance_threshold": -0.0012654521397011506, "compression/movement_sparsity/linear_layer_sparsity": 0.5868439752236783, "compression/movement_sparsity/model_sparsity": 0.566684079331513, "compression_loss": 77.86689758300781, "distillation_loss": 1.7838380336761475, "epoch": 1.35, "learning_rate": 4.035744475304915e-05, "loss": 81.0564, "step": 1597, "task_loss": 1.5751001834869385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7249465679398925, "compression/movement_sparsity/importance_threshold": -0.0012605345850842538, "compression/movement_sparsity/linear_layer_sparsity": 0.5883139866098275, "compression/movement_sparsity/model_sparsity": 0.568103591304264, "compression_loss": 77.98182678222656, "distillation_loss": 4.349267959594727, "epoch": 1.35, "learning_rate": 4.035140683492332e-05, "loss": 80.9819, "step": 1598, "task_loss": 2.384502649307251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7260168135546251, "compression/movement_sparsity/importance_threshold": -0.0012556297867627035, "compression/movement_sparsity/linear_layer_sparsity": 0.5898040067492697, "compression/movement_sparsity/model_sparsity": 0.5695424246680783, "compression_loss": 78.09646606445312, "distillation_loss": 3.583531141281128, "epoch": 1.35, "learning_rate": 4.034536891679749e-05, "loss": 81.385, "step": 1599, "task_loss": 3.078467845916748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7270842793121828, "compression/movement_sparsity/importance_threshold": -0.001250737728169882, "compression/movement_sparsity/linear_layer_sparsity": 0.5912714544393771, "compression/movement_sparsity/model_sparsity": 0.5709594610156334, "compression_loss": 78.2107925415039, "distillation_loss": 2.972083806991577, "epoch": 1.35, "learning_rate": 4.033933099867166e-05, "loss": 80.8402, "step": 1600, "task_loss": 1.2363975048065186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7281489688274649, "compression/movement_sparsity/importance_threshold": -0.0012458583927391705, "compression/movement_sparsity/linear_layer_sparsity": 0.5926716975206889, "compression/movement_sparsity/model_sparsity": 0.5723116014394506, "compression_loss": 78.32475280761719, "distillation_loss": 4.576032638549805, "epoch": 1.35, "learning_rate": 4.033329308054583e-05, "loss": 82.5732, "step": 1601, "task_loss": 2.170125722885132 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7292108857153698, "compression/movement_sparsity/importance_threshold": -0.001240991763903953, "compression/movement_sparsity/linear_layer_sparsity": 0.593923425094096, "compression/movement_sparsity/model_sparsity": 0.5735203283199467, "compression_loss": 78.43839263916016, "distillation_loss": 3.112761974334717, "epoch": 1.35, "learning_rate": 4.032725516242e-05, "loss": 81.8997, "step": 1602, "task_loss": 2.036247968673706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7302700335907963, "compression/movement_sparsity/importance_threshold": -0.001236137825097613, "compression/movement_sparsity/linear_layer_sparsity": 0.595433263200149, "compression/movement_sparsity/model_sparsity": 0.5749782988422514, "compression_loss": 78.55184936523438, "distillation_loss": 3.61679744720459, "epoch": 1.35, "learning_rate": 4.032121724429417e-05, "loss": 81.8804, "step": 1603, "task_loss": 1.4147759675979614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7313264160686435, "compression/movement_sparsity/importance_threshold": -0.0012312965597535315, "compression/movement_sparsity/linear_layer_sparsity": 0.5967239112567195, "compression/movement_sparsity/model_sparsity": 0.5762246091675809, "compression_loss": 78.66490936279297, "distillation_loss": 2.5168631076812744, "epoch": 1.36, "learning_rate": 4.031517932616834e-05, "loss": 82.4693, "step": 1604, "task_loss": 1.5032342672348022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.73238003676381, "compression/movement_sparsity/importance_threshold": -0.0012264679513050909, "compression/movement_sparsity/linear_layer_sparsity": 0.5981789697366533, "compression/movement_sparsity/model_sparsity": 0.5776296819124456, "compression_loss": 78.77769470214844, "distillation_loss": 2.7998342514038086, "epoch": 1.36, "learning_rate": 4.030914140804251e-05, "loss": 81.6353, "step": 1605, "task_loss": 1.7618314027786255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7334308992911944, "compression/movement_sparsity/importance_threshold": -0.0012216519831856763, "compression/movement_sparsity/linear_layer_sparsity": 0.599607997758638, "compression/movement_sparsity/model_sparsity": 0.5790096184256707, "compression_loss": 78.89022064208984, "distillation_loss": 2.930546522140503, "epoch": 1.36, "learning_rate": 4.030310348991668e-05, "loss": 82.2057, "step": 1606, "task_loss": 1.6712331771850586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7344790072656959, "compression/movement_sparsity/importance_threshold": -0.0012168486388286679, "compression/movement_sparsity/linear_layer_sparsity": 0.6008313219647365, "compression/movement_sparsity/model_sparsity": 0.5801909176819042, "compression_loss": 79.00244903564453, "distillation_loss": 4.532224655151367, "epoch": 1.36, "learning_rate": 4.0297065571790846e-05, "loss": 82.3143, "step": 1607, "task_loss": 2.9387285709381104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7355243643022131, "compression/movement_sparsity/importance_threshold": -0.001212057901667449, "compression/movement_sparsity/linear_layer_sparsity": 0.6022234923845587, "compression/movement_sparsity/model_sparsity": 0.5815352627649885, "compression_loss": 79.1143569946289, "distillation_loss": 4.550535202026367, "epoch": 1.36, "learning_rate": 4.029102765366502e-05, "loss": 82.6842, "step": 1608, "task_loss": 2.0873897075653076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7365669740156446, "compression/movement_sparsity/importance_threshold": -0.0012072797551354033, "compression/movement_sparsity/linear_layer_sparsity": 0.6036014849690621, "compression/movement_sparsity/model_sparsity": 0.582865917065013, "compression_loss": 79.22594451904297, "distillation_loss": 3.406613826751709, "epoch": 1.36, "learning_rate": 4.028498973553919e-05, "loss": 82.6393, "step": 1609, "task_loss": 1.810191035270691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7376068400208897, "compression/movement_sparsity/importance_threshold": -0.001202514182665912, "compression/movement_sparsity/linear_layer_sparsity": 0.6049322816980626, "compression/movement_sparsity/model_sparsity": 0.5841509968323627, "compression_loss": 79.33728790283203, "distillation_loss": 3.6580162048339844, "epoch": 1.36, "learning_rate": 4.0278951817413355e-05, "loss": 82.6521, "step": 1610, "task_loss": 1.5198159217834473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7386439659328468, "compression/movement_sparsity/importance_threshold": -0.0011977611676923586, "compression/movement_sparsity/linear_layer_sparsity": 0.6063314993009576, "compression/movement_sparsity/model_sparsity": 0.5855021470061015, "compression_loss": 79.4483413696289, "distillation_loss": 5.704624176025391, "epoch": 1.36, "learning_rate": 4.027291389928753e-05, "loss": 83.5544, "step": 1611, "task_loss": 4.130870819091797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7396783553664146, "compression/movement_sparsity/importance_threshold": -0.0011930206936481263, "compression/movement_sparsity/linear_layer_sparsity": 0.6076492152180616, "compression/movement_sparsity/model_sparsity": 0.5867745953276846, "compression_loss": 79.55906677246094, "distillation_loss": 3.7259910106658936, "epoch": 1.36, "learning_rate": 4.0266875981161696e-05, "loss": 82.8336, "step": 1612, "task_loss": 3.5618958473205566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7407100119364922, "compression/movement_sparsity/importance_threshold": -0.0011882927439665967, "compression/movement_sparsity/linear_layer_sparsity": 0.6090104066503659, "compression/movement_sparsity/model_sparsity": 0.5880890256467747, "compression_loss": 79.6695327758789, "distillation_loss": 2.902534008026123, "epoch": 1.36, "learning_rate": 4.026083806303586e-05, "loss": 82.7543, "step": 1613, "task_loss": 0.9674163460731506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7417389392579783, "compression/movement_sparsity/importance_threshold": -0.001183577302081153, "compression/movement_sparsity/linear_layer_sparsity": 0.6105461798210269, "compression/movement_sparsity/model_sparsity": 0.5895720402844326, "compression_loss": 79.77964782714844, "distillation_loss": 3.1617379188537598, "epoch": 1.36, "learning_rate": 4.025480014491004e-05, "loss": 82.9332, "step": 1614, "task_loss": 2.204493284225464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7427651409457716, "compression/movement_sparsity/importance_threshold": -0.0011788743514251775, "compression/movement_sparsity/linear_layer_sparsity": 0.6118350034799491, "compression/movement_sparsity/model_sparsity": 0.5908165888857855, "compression_loss": 79.8895034790039, "distillation_loss": 3.248403310775757, "epoch": 1.36, "learning_rate": 4.024876222678421e-05, "loss": 82.7863, "step": 1615, "task_loss": 1.846352458000183 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7437886206147711, "compression/movement_sparsity/importance_threshold": -0.0011741838754320529, "compression/movement_sparsity/linear_layer_sparsity": 0.613301747644166, "compression/movement_sparsity/model_sparsity": 0.5922329458757287, "compression_loss": 79.99906158447266, "distillation_loss": 2.494969129562378, "epoch": 1.37, "learning_rate": 4.024272430865838e-05, "loss": 83.7716, "step": 1616, "task_loss": 1.5132652521133423 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7448093818798753, "compression/movement_sparsity/importance_threshold": -0.001169505857535163, "compression/movement_sparsity/linear_layer_sparsity": 0.6145532367342205, "compression/movement_sparsity/model_sparsity": 0.593441442465509, "compression_loss": 80.10831451416016, "distillation_loss": 4.804149627685547, "epoch": 1.37, "learning_rate": 4.0236686390532545e-05, "loss": 83.3611, "step": 1617, "task_loss": 2.373253583908081 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7458274283559834, "compression/movement_sparsity/importance_threshold": -0.0011648402811678888, "compression/movement_sparsity/linear_layer_sparsity": 0.6158639054682515, "compression/movement_sparsity/model_sparsity": 0.5947070856964375, "compression_loss": 80.2173080444336, "distillation_loss": 3.256453037261963, "epoch": 1.37, "learning_rate": 4.023064847240672e-05, "loss": 83.806, "step": 1618, "task_loss": 1.317393183708191 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7468427636579938, "compression/movement_sparsity/importance_threshold": -0.0011601871297636135, "compression/movement_sparsity/linear_layer_sparsity": 0.6172172388740839, "compression/movement_sparsity/model_sparsity": 0.5960139279364389, "compression_loss": 80.32591247558594, "distillation_loss": 6.651098251342773, "epoch": 1.37, "learning_rate": 4.0224610554280886e-05, "loss": 84.0133, "step": 1619, "task_loss": 3.309173107147217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7478553914008055, "compression/movement_sparsity/importance_threshold": -0.0011555463867557207, "compression/movement_sparsity/linear_layer_sparsity": 0.6185358729520958, "compression/movement_sparsity/model_sparsity": 0.5972872628772782, "compression_loss": 80.4343032836914, "distillation_loss": 2.786642074584961, "epoch": 1.37, "learning_rate": 4.0218572636155054e-05, "loss": 83.8614, "step": 1620, "task_loss": 0.6313796639442444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7488653151993172, "compression/movement_sparsity/importance_threshold": -0.001150918035577592, "compression/movement_sparsity/linear_layer_sparsity": 0.6200135992747053, "compression/movement_sparsity/model_sparsity": 0.5987142247546883, "compression_loss": 80.54240417480469, "distillation_loss": 3.1132426261901855, "epoch": 1.37, "learning_rate": 4.021253471802923e-05, "loss": 83.958, "step": 1621, "task_loss": 2.0202646255493164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7498725386684278, "compression/movement_sparsity/importance_threshold": -0.0011463020596626104, "compression/movement_sparsity/linear_layer_sparsity": 0.621507661706976, "compression/movement_sparsity/model_sparsity": 0.6001569615461368, "compression_loss": 80.65015411376953, "distillation_loss": 3.6903536319732666, "epoch": 1.37, "learning_rate": 4.0206496799903395e-05, "loss": 84.0935, "step": 1622, "task_loss": 1.9884722232818604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.750877065423036, "compression/movement_sparsity/importance_threshold": -0.001141698442444159, "compression/movement_sparsity/linear_layer_sparsity": 0.6228701528735527, "compression/movement_sparsity/model_sparsity": 0.6014726469496284, "compression_loss": 80.75774383544922, "distillation_loss": 3.396599054336548, "epoch": 1.37, "learning_rate": 4.020045888177756e-05, "loss": 84.3468, "step": 1623, "task_loss": 2.6340548992156982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7518788990780407, "compression/movement_sparsity/importance_threshold": -0.0011371071673556199, "compression/movement_sparsity/linear_layer_sparsity": 0.624060506756138, "compression/movement_sparsity/model_sparsity": 0.6026221085143901, "compression_loss": 80.86492156982422, "distillation_loss": 4.441439628601074, "epoch": 1.37, "learning_rate": 4.0194420963651736e-05, "loss": 84.6491, "step": 1624, "task_loss": 2.801281690597534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7528780432483407, "compression/movement_sparsity/importance_threshold": -0.0011325282178303755, "compression/movement_sparsity/linear_layer_sparsity": 0.625469418707321, "compression/movement_sparsity/model_sparsity": 0.6039826200057299, "compression_loss": 80.9717788696289, "distillation_loss": 1.6084153652191162, "epoch": 1.37, "learning_rate": 4.018838304552591e-05, "loss": 83.8473, "step": 1625, "task_loss": 1.2388660907745361 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7538745015488346, "compression/movement_sparsity/importance_threshold": -0.0011279615773018095, "compression/movement_sparsity/linear_layer_sparsity": 0.6268814667145923, "compression/movement_sparsity/model_sparsity": 0.6053461598199836, "compression_loss": 81.07847595214844, "distillation_loss": 3.8735873699188232, "epoch": 1.37, "learning_rate": 4.018234512740007e-05, "loss": 84.5051, "step": 1626, "task_loss": 2.0619094371795654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7548682775944217, "compression/movement_sparsity/importance_threshold": -0.001123407229203303, "compression/movement_sparsity/linear_layer_sparsity": 0.6283527539866786, "compression/movement_sparsity/model_sparsity": 0.6067669038480646, "compression_loss": 81.18480682373047, "distillation_loss": 3.766946315765381, "epoch": 1.38, "learning_rate": 4.0176307209274244e-05, "loss": 84.8441, "step": 1627, "task_loss": 2.1531033515930176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.755859375, "compression/movement_sparsity/importance_threshold": -0.0011188651569682406, "compression/movement_sparsity/linear_layer_sparsity": 0.6298831016610652, "compression/movement_sparsity/model_sparsity": 0.6082446793719359, "compression_loss": 81.29084014892578, "distillation_loss": 2.5535950660705566, "epoch": 1.38, "learning_rate": 4.017026929114842e-05, "loss": 84.2531, "step": 1628, "task_loss": 1.2741971015930176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7568477973804688, "compression/movement_sparsity/importance_threshold": -0.0011143353440300039, "compression/movement_sparsity/linear_layer_sparsity": 0.6312720287072905, "compression/movement_sparsity/model_sparsity": 0.609585892501284, "compression_loss": 81.39664459228516, "distillation_loss": 2.8981800079345703, "epoch": 1.38, "learning_rate": 4.016423137302258e-05, "loss": 85.5777, "step": 1629, "task_loss": 1.4568125009536743 }, { "compression/movement_sparsity/importance_regularization_factor": 0.757833548350727, "compression/movement_sparsity/importance_threshold": -0.0011098177738219752, "compression/movement_sparsity/linear_layer_sparsity": 0.6325966010207851, "compression/movement_sparsity/model_sparsity": 0.6108649616809491, "compression_loss": 81.50209045410156, "distillation_loss": 4.95496129989624, "epoch": 1.38, "learning_rate": 4.015819345489675e-05, "loss": 85.1678, "step": 1630, "task_loss": 2.712144613265991 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7588166315256731, "compression/movement_sparsity/importance_threshold": -0.001105312429777538, "compression/movement_sparsity/linear_layer_sparsity": 0.6339634922052193, "compression/movement_sparsity/model_sparsity": 0.6121848959481488, "compression_loss": 81.60726165771484, "distillation_loss": 4.475948333740234, "epoch": 1.38, "learning_rate": 4.0152155536770927e-05, "loss": 85.3952, "step": 1631, "task_loss": 2.4647810459136963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7597970505202061, "compression/movement_sparsity/importance_threshold": -0.0011008192953300745, "compression/movement_sparsity/linear_layer_sparsity": 0.635287766414523, "compression/movement_sparsity/model_sparsity": 0.613463677264419, "compression_loss": 81.71216583251953, "distillation_loss": 3.3007102012634277, "epoch": 1.38, "learning_rate": 4.0146117618645094e-05, "loss": 85.0655, "step": 1632, "task_loss": 1.9918498992919922 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7607748089492248, "compression/movement_sparsity/importance_threshold": -0.0010963383539129672, "compression/movement_sparsity/linear_layer_sparsity": 0.6364469624470758, "compression/movement_sparsity/model_sparsity": 0.6145830513471494, "compression_loss": 81.81681060791016, "distillation_loss": 4.362125873565674, "epoch": 1.38, "learning_rate": 4.014007970051926e-05, "loss": 85.4317, "step": 1633, "task_loss": 1.9316996335983276 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7617499104276276, "compression/movement_sparsity/importance_threshold": -0.0010918695889596, "compression/movement_sparsity/linear_layer_sparsity": 0.6377940356648992, "compression/movement_sparsity/model_sparsity": 0.6158838484558588, "compression_loss": 81.92118835449219, "distillation_loss": 5.8607988357543945, "epoch": 1.38, "learning_rate": 4.0134041782393435e-05, "loss": 86.0713, "step": 1634, "task_loss": 2.5826332569122314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7627223585703139, "compression/movement_sparsity/importance_threshold": -0.0010874129839033536, "compression/movement_sparsity/linear_layer_sparsity": 0.639288980485575, "compression/movement_sparsity/model_sparsity": 0.6173274373229561, "compression_loss": 82.0252914428711, "distillation_loss": 3.0944275856018066, "epoch": 1.38, "learning_rate": 4.01280038642676e-05, "loss": 85.9963, "step": 1635, "task_loss": 2.175658702850342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7636921569921822, "compression/movement_sparsity/importance_threshold": -0.0010829685221776116, "compression/movement_sparsity/linear_layer_sparsity": 0.640445123931213, "compression/movement_sparsity/model_sparsity": 0.6184438636845232, "compression_loss": 82.12907409667969, "distillation_loss": 3.0649914741516113, "epoch": 1.38, "learning_rate": 4.012196594614177e-05, "loss": 86.0704, "step": 1636, "task_loss": 2.0274946689605713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7646593093081311, "compression/movement_sparsity/importance_threshold": -0.0010785361872157578, "compression/movement_sparsity/linear_layer_sparsity": 0.6416067405697958, "compression/movement_sparsity/model_sparsity": 0.6195655752180199, "compression_loss": 82.23262786865234, "distillation_loss": 3.0486879348754883, "epoch": 1.38, "learning_rate": 4.011592802801594e-05, "loss": 85.799, "step": 1637, "task_loss": 1.1070353984832764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7656238191330598, "compression/movement_sparsity/importance_threshold": -0.0010741159624511726, "compression/movement_sparsity/linear_layer_sparsity": 0.6427764894907064, "compression/movement_sparsity/model_sparsity": 0.6206951396649286, "compression_loss": 82.33580780029297, "distillation_loss": 2.4167075157165527, "epoch": 1.38, "learning_rate": 4.010989010989011e-05, "loss": 85.0675, "step": 1638, "task_loss": 1.843968391418457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7665856900818668, "compression/movement_sparsity/importance_threshold": -0.0010697078313172403, "compression/movement_sparsity/linear_layer_sparsity": 0.6440212771985494, "compression/movement_sparsity/model_sparsity": 0.6218971650855923, "compression_loss": 82.43873596191406, "distillation_loss": 4.907580852508545, "epoch": 1.39, "learning_rate": 4.010385219176428e-05, "loss": 85.9671, "step": 1639, "task_loss": 2.5753583908081055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7675449257694509, "compression/movement_sparsity/importance_threshold": -0.001065311777247344, "compression/movement_sparsity/linear_layer_sparsity": 0.64537007942068, "compression/movement_sparsity/model_sparsity": 0.6231996318019918, "compression_loss": 82.54131317138672, "distillation_loss": 3.715177059173584, "epoch": 1.39, "learning_rate": 4.009781427363845e-05, "loss": 86.3028, "step": 1640, "task_loss": 1.7788053750991821 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7685015298107112, "compression/movement_sparsity/importance_threshold": -0.0010609277836748643, "compression/movement_sparsity/linear_layer_sparsity": 0.6466402894539227, "compression/movement_sparsity/model_sparsity": 0.6244262062129695, "compression_loss": 82.64364624023438, "distillation_loss": 3.88571834564209, "epoch": 1.39, "learning_rate": 4.0091776355512625e-05, "loss": 86.524, "step": 1641, "task_loss": 2.0651512145996094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7694555058205462, "compression/movement_sparsity/importance_threshold": -0.0010565558340331857, "compression/movement_sparsity/linear_layer_sparsity": 0.6478877243269809, "compression/movement_sparsity/model_sparsity": 0.6256307878605796, "compression_loss": 82.7457046508789, "distillation_loss": 4.374055862426758, "epoch": 1.39, "learning_rate": 4.0085738437386786e-05, "loss": 86.617, "step": 1642, "task_loss": 2.068756580352783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7704068574138547, "compression/movement_sparsity/importance_threshold": -0.0010521959117556902, "compression/movement_sparsity/linear_layer_sparsity": 0.649177537691817, "compression/movement_sparsity/model_sparsity": 0.6268762921684035, "compression_loss": 82.84752655029297, "distillation_loss": 3.9567947387695312, "epoch": 1.39, "learning_rate": 4.007970051926096e-05, "loss": 87.3377, "step": 1643, "task_loss": 2.092651128768921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7713555882055356, "compression/movement_sparsity/importance_threshold": -0.0010478480002757608, "compression/movement_sparsity/linear_layer_sparsity": 0.6504416306270624, "compression/movement_sparsity/model_sparsity": 0.6280969596225185, "compression_loss": 82.94898986816406, "distillation_loss": 3.515655994415283, "epoch": 1.39, "learning_rate": 4.0073662601135134e-05, "loss": 86.6531, "step": 1644, "task_loss": 1.6427217721939087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7723017018104879, "compression/movement_sparsity/importance_threshold": -0.0010435120830267794, "compression/movement_sparsity/linear_layer_sparsity": 0.6515896417903727, "compression/movement_sparsity/model_sparsity": 0.6292055330706737, "compression_loss": 83.0501937866211, "distillation_loss": 3.703429937362671, "epoch": 1.39, "learning_rate": 4.00676246830093e-05, "loss": 86.6327, "step": 1645, "task_loss": 2.1823105812072754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7732452018436099, "compression/movement_sparsity/importance_threshold": -0.0010391881434421296, "compression/movement_sparsity/linear_layer_sparsity": 0.6527239520850695, "compression/movement_sparsity/model_sparsity": 0.630300876317201, "compression_loss": 83.1511459350586, "distillation_loss": 3.118286609649658, "epoch": 1.39, "learning_rate": 4.006158676488347e-05, "loss": 86.8482, "step": 1646, "task_loss": 1.610983967781067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7741860919198009, "compression/movement_sparsity/importance_threshold": -0.0010348761649551924, "compression/movement_sparsity/linear_layer_sparsity": 0.6539181813221364, "compression/movement_sparsity/model_sparsity": 0.6314540801060958, "compression_loss": 83.2518081665039, "distillation_loss": 3.4543380737304688, "epoch": 1.39, "learning_rate": 4.005554884675764e-05, "loss": 87.1344, "step": 1647, "task_loss": 2.5398945808410645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7751243756539593, "compression/movement_sparsity/importance_threshold": -0.001030576130999353, "compression/movement_sparsity/linear_layer_sparsity": 0.6552114526955869, "compression/movement_sparsity/model_sparsity": 0.6327029236293001, "compression_loss": 83.35221099853516, "distillation_loss": 4.464435577392578, "epoch": 1.39, "learning_rate": 4.004951092863181e-05, "loss": 86.9493, "step": 1648, "task_loss": 2.8807921409606934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7760600566609841, "compression/movement_sparsity/importance_threshold": -0.0010262880250079924, "compression/movement_sparsity/linear_layer_sparsity": 0.6562733713685663, "compression/movement_sparsity/model_sparsity": 0.6337283621290187, "compression_loss": 83.45223999023438, "distillation_loss": 3.3357090950012207, "epoch": 1.39, "learning_rate": 4.0043473010505977e-05, "loss": 87.1625, "step": 1649, "task_loss": 2.6801722049713135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7769931385557741, "compression/movement_sparsity/importance_threshold": -0.0010220118304144934, "compression/movement_sparsity/linear_layer_sparsity": 0.6573376629509052, "compression/movement_sparsity/model_sparsity": 0.6347560920213605, "compression_loss": 83.55213928222656, "distillation_loss": 3.6553664207458496, "epoch": 1.39, "learning_rate": 4.003743509238015e-05, "loss": 87.6447, "step": 1650, "task_loss": 1.9374828338623047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.777923624953228, "compression/movement_sparsity/importance_threshold": -0.0010177475306522399, "compression/movement_sparsity/linear_layer_sparsity": 0.6584015014148741, "compression/movement_sparsity/model_sparsity": 0.6357833843613421, "compression_loss": 83.65162658691406, "distillation_loss": 3.8551483154296875, "epoch": 1.4, "learning_rate": 4.003139717425432e-05, "loss": 88.024, "step": 1651, "task_loss": 1.7644740343093872 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7788515194682447, "compression/movement_sparsity/importance_threshold": -0.0010134951091546125, "compression/movement_sparsity/linear_layer_sparsity": 0.6595108782750442, "compression/movement_sparsity/model_sparsity": 0.636854650713523, "compression_loss": 83.75086975097656, "distillation_loss": 3.384397506713867, "epoch": 1.4, "learning_rate": 4.0025359256128485e-05, "loss": 87.167, "step": 1652, "task_loss": 1.03892183303833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.779776825715723, "compression/movement_sparsity/importance_threshold": -0.0010092545493549949, "compression/movement_sparsity/linear_layer_sparsity": 0.6607438372085924, "compression/movement_sparsity/model_sparsity": 0.6380452537146785, "compression_loss": 83.8498764038086, "distillation_loss": 3.3967244625091553, "epoch": 1.4, "learning_rate": 4.001932133800266e-05, "loss": 87.8011, "step": 1653, "task_loss": 1.5546939373016357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7806995473105616, "compression/movement_sparsity/importance_threshold": -0.0010050258346867703, "compression/movement_sparsity/linear_layer_sparsity": 0.6619164717780708, "compression/movement_sparsity/model_sparsity": 0.6391776046792494, "compression_loss": 83.94854736328125, "distillation_loss": 4.817241191864014, "epoch": 1.4, "learning_rate": 4.0013283419876826e-05, "loss": 88.6585, "step": 1654, "task_loss": 2.9174211025238037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7816196878676593, "compression/movement_sparsity/importance_threshold": -0.0010008089485833206, "compression/movement_sparsity/linear_layer_sparsity": 0.6630537034938383, "compression/movement_sparsity/model_sparsity": 0.6402757689870464, "compression_loss": 84.04703521728516, "distillation_loss": 4.084846496582031, "epoch": 1.4, "learning_rate": 4.0007245501751e-05, "loss": 87.6796, "step": 1655, "task_loss": 1.6033703088760376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7825372510019151, "compression/movement_sparsity/importance_threshold": -0.0009966038744780286, "compression/movement_sparsity/linear_layer_sparsity": 0.6642824531962112, "compression/movement_sparsity/model_sparsity": 0.6414623073570664, "compression_loss": 84.14519500732422, "distillation_loss": 4.17720890045166, "epoch": 1.4, "learning_rate": 4.000120758362517e-05, "loss": 88.7166, "step": 1656, "task_loss": 1.5864524841308594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7834522403282274, "compression/movement_sparsity/importance_threshold": -0.0009924105958042779, "compression/movement_sparsity/linear_layer_sparsity": 0.6654281033743296, "compression/movement_sparsity/model_sparsity": 0.6425686009271343, "compression_loss": 84.24308776855469, "distillation_loss": 3.79018497467041, "epoch": 1.4, "learning_rate": 3.999516966549934e-05, "loss": 88.0096, "step": 1657, "task_loss": 2.107457399368286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7843646594614954, "compression/movement_sparsity/importance_threshold": -0.0009882290959954492, "compression/movement_sparsity/linear_layer_sparsity": 0.6665536255574787, "compression/movement_sparsity/model_sparsity": 0.643655457960781, "compression_loss": 84.34066772460938, "distillation_loss": 3.8456313610076904, "epoch": 1.4, "learning_rate": 3.998913174737351e-05, "loss": 88.1788, "step": 1658, "task_loss": 1.6238396167755127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7852745120166177, "compression/movement_sparsity/importance_threshold": -0.0009840593584849269, "compression/movement_sparsity/linear_layer_sparsity": 0.6676983098780186, "compression/movement_sparsity/model_sparsity": 0.6447608188534495, "compression_loss": 84.43799591064453, "distillation_loss": 4.9036664962768555, "epoch": 1.4, "learning_rate": 3.9983093829247675e-05, "loss": 88.5799, "step": 1659, "task_loss": 1.743956208229065 }, { "compression/movement_sparsity/importance_regularization_factor": 0.786181801608493, "compression/movement_sparsity/importance_threshold": -0.0009799013667060935, "compression/movement_sparsity/linear_layer_sparsity": 0.6688200997966978, "compression/movement_sparsity/model_sparsity": 0.6458440718373926, "compression_loss": 84.53499603271484, "distillation_loss": 3.0764870643615723, "epoch": 1.4, "learning_rate": 3.997705591112185e-05, "loss": 87.7073, "step": 1660, "task_loss": 0.9585474729537964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7870865318520204, "compression/movement_sparsity/importance_threshold": -0.0009757551040923307, "compression/movement_sparsity/linear_layer_sparsity": 0.6698171867702409, "compression/movement_sparsity/model_sparsity": 0.6468069058059963, "compression_loss": 84.63175201416016, "distillation_loss": 3.292184829711914, "epoch": 1.4, "learning_rate": 3.9971017992996017e-05, "loss": 88.0221, "step": 1661, "task_loss": 2.6148476600646973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7879887063620985, "compression/movement_sparsity/importance_threshold": -0.000971620554077022, "compression/movement_sparsity/linear_layer_sparsity": 0.6708419974335376, "compression/movement_sparsity/model_sparsity": 0.6477965110703223, "compression_loss": 84.72817993164062, "distillation_loss": 5.365428924560547, "epoch": 1.4, "learning_rate": 3.9964980074870184e-05, "loss": 88.9951, "step": 1662, "task_loss": 2.539740562438965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.788888328753626, "compression/movement_sparsity/importance_threshold": -0.0009674977000935499, "compression/movement_sparsity/linear_layer_sparsity": 0.672099150503219, "compression/movement_sparsity/model_sparsity": 0.6490104770646049, "compression_loss": 84.82437133789062, "distillation_loss": 3.8918440341949463, "epoch": 1.41, "learning_rate": 3.995894215674436e-05, "loss": 89.0654, "step": 1663, "task_loss": 2.2313568592071533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7897854026415021, "compression/movement_sparsity/importance_threshold": -0.0009633865255752961, "compression/movement_sparsity/linear_layer_sparsity": 0.6733016074159547, "compression/movement_sparsity/model_sparsity": 0.650171625883198, "compression_loss": 84.92028045654297, "distillation_loss": 3.06754469871521, "epoch": 1.41, "learning_rate": 3.9952904238618525e-05, "loss": 88.563, "step": 1664, "task_loss": 1.8983546495437622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.790679931640625, "compression/movement_sparsity/importance_threshold": -0.0009592870139556453, "compression/movement_sparsity/linear_layer_sparsity": 0.6743543325556868, "compression/movement_sparsity/model_sparsity": 0.651188186675819, "compression_loss": 85.01600646972656, "distillation_loss": 3.95340633392334, "epoch": 1.41, "learning_rate": 3.99468663204927e-05, "loss": 88.5551, "step": 1665, "task_loss": 2.658905029296875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7915719193658939, "compression/movement_sparsity/importance_threshold": -0.0009551991486679789, "compression/movement_sparsity/linear_layer_sparsity": 0.6753772472763293, "compression/movement_sparsity/model_sparsity": 0.6521759611289537, "compression_loss": 85.11135864257812, "distillation_loss": 3.0069215297698975, "epoch": 1.41, "learning_rate": 3.9940828402366866e-05, "loss": 88.4403, "step": 1666, "task_loss": 1.9633991718292236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7924613694322076, "compression/movement_sparsity/importance_threshold": -0.0009511229131456789, "compression/movement_sparsity/linear_layer_sparsity": 0.6764208981244906, "compression/movement_sparsity/model_sparsity": 0.6531837593598351, "compression_loss": 85.2065200805664, "distillation_loss": 3.1621484756469727, "epoch": 1.41, "learning_rate": 3.993479048424103e-05, "loss": 88.9006, "step": 1667, "task_loss": 1.8997546434402466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7933482854544647, "compression/movement_sparsity/importance_threshold": -0.0009470582908221296, "compression/movement_sparsity/linear_layer_sparsity": 0.6775160017560006, "compression/movement_sparsity/model_sparsity": 0.6542412428126699, "compression_loss": 85.3013916015625, "distillation_loss": 3.5697789192199707, "epoch": 1.41, "learning_rate": 3.992875256611521e-05, "loss": 88.5721, "step": 1668, "task_loss": 1.093035101890564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7942326710475641, "compression/movement_sparsity/importance_threshold": -0.0009430052651307123, "compression/movement_sparsity/linear_layer_sparsity": 0.6787470408985595, "compression/movement_sparsity/model_sparsity": 0.6554299919735626, "compression_loss": 85.39590454101562, "distillation_loss": 3.5908279418945312, "epoch": 1.41, "learning_rate": 3.9922714647989374e-05, "loss": 89.0643, "step": 1669, "task_loss": 1.2222788333892822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7951145298264047, "compression/movement_sparsity/importance_threshold": -0.0009389638195048096, "compression/movement_sparsity/linear_layer_sparsity": 0.6798849165193793, "compression/movement_sparsity/model_sparsity": 0.6565287780662924, "compression_loss": 85.49022674560547, "distillation_loss": 4.516538619995117, "epoch": 1.41, "learning_rate": 3.991667672986354e-05, "loss": 89.531, "step": 1670, "task_loss": 2.5734446048736572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.795993865405885, "compression/movement_sparsity/importance_threshold": -0.0009349339373778062, "compression/movement_sparsity/linear_layer_sparsity": 0.6813025808096072, "compression/movement_sparsity/model_sparsity": 0.6578977412269054, "compression_loss": 85.58421325683594, "distillation_loss": 4.479083061218262, "epoch": 1.41, "learning_rate": 3.9910638811737716e-05, "loss": 88.9979, "step": 1671, "task_loss": 3.063530445098877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7968706814009042, "compression/movement_sparsity/importance_threshold": -0.0009309156021830821, "compression/movement_sparsity/linear_layer_sparsity": 0.6825175342500251, "compression/movement_sparsity/model_sparsity": 0.6590709572790111, "compression_loss": 85.67796325683594, "distillation_loss": 4.074405193328857, "epoch": 1.41, "learning_rate": 3.990460089361188e-05, "loss": 89.515, "step": 1672, "task_loss": 1.4055789709091187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7977449814263609, "compression/movement_sparsity/importance_threshold": -0.0009269087973540215, "compression/movement_sparsity/linear_layer_sparsity": 0.6836758478941729, "compression/movement_sparsity/model_sparsity": 0.6601894792860928, "compression_loss": 85.77140808105469, "distillation_loss": 4.347858428955078, "epoch": 1.41, "learning_rate": 3.989856297548606e-05, "loss": 90.0719, "step": 1673, "task_loss": 2.27360200881958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7986167690971536, "compression/movement_sparsity/importance_threshold": -0.0009229135063240075, "compression/movement_sparsity/linear_layer_sparsity": 0.6848866517242537, "compression/movement_sparsity/model_sparsity": 0.6613586882797421, "compression_loss": 85.86457824707031, "distillation_loss": 4.021519660949707, "epoch": 1.41, "learning_rate": 3.9892525057360224e-05, "loss": 89.6945, "step": 1674, "task_loss": 1.613802433013916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7994860480281816, "compression/movement_sparsity/importance_threshold": -0.0009189297125264217, "compression/movement_sparsity/linear_layer_sparsity": 0.6860108980214658, "compression/movement_sparsity/model_sparsity": 0.6624443132580587, "compression_loss": 85.95750427246094, "distillation_loss": 5.3593292236328125, "epoch": 1.42, "learning_rate": 3.98864871392344e-05, "loss": 89.7843, "step": 1675, "task_loss": 3.014176368713379 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8003528218343435, "compression/movement_sparsity/importance_threshold": -0.0009149573993946459, "compression/movement_sparsity/linear_layer_sparsity": 0.6871147659161881, "compression/movement_sparsity/model_sparsity": 0.6635102598947026, "compression_loss": 86.0501937866211, "distillation_loss": 2.9685311317443848, "epoch": 1.42, "learning_rate": 3.9880449221108565e-05, "loss": 90.0566, "step": 1676, "task_loss": 1.6767326593399048 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8012170941305379, "compression/movement_sparsity/importance_threshold": -0.0009109965503620657, "compression/movement_sparsity/linear_layer_sparsity": 0.6883539254169071, "compression/movement_sparsity/model_sparsity": 0.6647068504544712, "compression_loss": 86.142578125, "distillation_loss": 6.478410720825195, "epoch": 1.42, "learning_rate": 3.987441130298273e-05, "loss": 91.1034, "step": 1677, "task_loss": 2.6334481239318848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.802078868531664, "compression/movement_sparsity/importance_threshold": -0.0009070471488620609, "compression/movement_sparsity/linear_layer_sparsity": 0.6893241949374372, "compression/movement_sparsity/model_sparsity": 0.665643788232073, "compression_loss": 86.23472595214844, "distillation_loss": 6.091056823730469, "epoch": 1.42, "learning_rate": 3.9868373384856906e-05, "loss": 90.7124, "step": 1678, "task_loss": 3.2254257202148438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8029381486526203, "compression/movement_sparsity/importance_threshold": -0.0009031091783280156, "compression/movement_sparsity/linear_layer_sparsity": 0.6904316162341151, "compression/movement_sparsity/model_sparsity": 0.6667131662003836, "compression_loss": 86.32658386230469, "distillation_loss": 3.6806795597076416, "epoch": 1.42, "learning_rate": 3.986233546673107e-05, "loss": 90.2452, "step": 1679, "task_loss": 2.302868366241455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8037949381083055, "compression/movement_sparsity/importance_threshold": -0.0008991826221933122, "compression/movement_sparsity/linear_layer_sparsity": 0.6914096960844468, "compression/movement_sparsity/model_sparsity": 0.6676576459989308, "compression_loss": 86.41810607910156, "distillation_loss": 4.773200035095215, "epoch": 1.42, "learning_rate": 3.985629754860524e-05, "loss": 90.3195, "step": 1680, "task_loss": 3.1848840713500977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8046492405136186, "compression/movement_sparsity/importance_threshold": -0.0008952674638913337, "compression/movement_sparsity/linear_layer_sparsity": 0.6924123993409464, "compression/movement_sparsity/model_sparsity": 0.6686259033138937, "compression_loss": 86.50947570800781, "distillation_loss": 6.736716270446777, "epoch": 1.42, "learning_rate": 3.9850259630479414e-05, "loss": 91.4756, "step": 1681, "task_loss": 3.942882776260376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8055010594834585, "compression/movement_sparsity/importance_threshold": -0.0008913636868554614, "compression/movement_sparsity/linear_layer_sparsity": 0.6933579142894646, "compression/movement_sparsity/model_sparsity": 0.6695389369151861, "compression_loss": 86.60055541992188, "distillation_loss": 6.376866817474365, "epoch": 1.42, "learning_rate": 3.984422171235358e-05, "loss": 91.7409, "step": 1682, "task_loss": 2.4286000728607178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8063503986327237, "compression/movement_sparsity/importance_threshold": -0.0008874712745190801, "compression/movement_sparsity/linear_layer_sparsity": 0.6944516347175289, "compression/movement_sparsity/model_sparsity": 0.6705950846818688, "compression_loss": 86.69133758544922, "distillation_loss": 4.11944055557251, "epoch": 1.42, "learning_rate": 3.983818379422775e-05, "loss": 90.7546, "step": 1683, "task_loss": 1.6835241317749023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8071972615763134, "compression/movement_sparsity/importance_threshold": -0.0008835902103155702, "compression/movement_sparsity/linear_layer_sparsity": 0.6954444170665556, "compression/movement_sparsity/model_sparsity": 0.6715537619030506, "compression_loss": 86.78190612792969, "distillation_loss": 4.0586137771606445, "epoch": 1.42, "learning_rate": 3.983214587610192e-05, "loss": 90.1239, "step": 1684, "task_loss": 1.9602917432785034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8080416519291259, "compression/movement_sparsity/importance_threshold": -0.0008797204776783162, "compression/movement_sparsity/linear_layer_sparsity": 0.6965035931809788, "compression/movement_sparsity/model_sparsity": 0.6725765520595365, "compression_loss": 86.87214660644531, "distillation_loss": 4.846198081970215, "epoch": 1.42, "learning_rate": 3.98261079579761e-05, "loss": 91.7024, "step": 1685, "task_loss": 2.139176368713379 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8088835733060604, "compression/movement_sparsity/importance_threshold": -0.0008758620600406997, "compression/movement_sparsity/linear_layer_sparsity": 0.6974791451077715, "compression/movement_sparsity/model_sparsity": 0.6735185907764952, "compression_loss": 86.96217346191406, "distillation_loss": 4.634245872497559, "epoch": 1.42, "learning_rate": 3.982007003985026e-05, "loss": 90.9322, "step": 1686, "task_loss": 4.0168375968933105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8097230293220155, "compression/movement_sparsity/importance_threshold": -0.0008720149408361043, "compression/movement_sparsity/linear_layer_sparsity": 0.6984227283411327, "compression/movement_sparsity/model_sparsity": 0.6744297590229889, "compression_loss": 87.05192565917969, "distillation_loss": 3.3765082359313965, "epoch": 1.43, "learning_rate": 3.981403212172443e-05, "loss": 90.8058, "step": 1687, "task_loss": 2.087331533432007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8105600235918899, "compression/movement_sparsity/importance_threshold": -0.0008681791034979118, "compression/movement_sparsity/linear_layer_sparsity": 0.6996021477619959, "compression/movement_sparsity/model_sparsity": 0.6755686617584269, "compression_loss": 87.14132690429688, "distillation_loss": 3.8782551288604736, "epoch": 1.43, "learning_rate": 3.9807994203598605e-05, "loss": 91.3368, "step": 1688, "task_loss": 2.0562119483947754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8113945597305827, "compression/movement_sparsity/importance_threshold": -0.0008643545314595048, "compression/movement_sparsity/linear_layer_sparsity": 0.7006645076291779, "compression/movement_sparsity/model_sparsity": 0.67659452629597, "compression_loss": 87.23048400878906, "distillation_loss": 4.624545097351074, "epoch": 1.43, "learning_rate": 3.980195628547277e-05, "loss": 91.2287, "step": 1689, "task_loss": 1.681640386581421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8122266413529925, "compression/movement_sparsity/importance_threshold": -0.0008605412081542666, "compression/movement_sparsity/linear_layer_sparsity": 0.701544105779005, "compression/movement_sparsity/model_sparsity": 0.67744390754339, "compression_loss": 87.31941986083984, "distillation_loss": 3.293827772140503, "epoch": 1.43, "learning_rate": 3.979591836734694e-05, "loss": 91.7155, "step": 1690, "task_loss": 2.327915668487549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.813056272074018, "compression/movement_sparsity/importance_threshold": -0.0008567391170155804, "compression/movement_sparsity/linear_layer_sparsity": 0.702651062033145, "compression/movement_sparsity/model_sparsity": 0.6785128364448046, "compression_loss": 87.4080581665039, "distillation_loss": 4.4201812744140625, "epoch": 1.43, "learning_rate": 3.9789880449221113e-05, "loss": 91.5328, "step": 1691, "task_loss": 2.2571511268615723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8138834555085582, "compression/movement_sparsity/importance_threshold": -0.0008529482414768273, "compression/movement_sparsity/linear_layer_sparsity": 0.7036429262212637, "compression/movement_sparsity/model_sparsity": 0.6794706270467303, "compression_loss": 87.4964370727539, "distillation_loss": 4.710515022277832, "epoch": 1.43, "learning_rate": 3.978384253109528e-05, "loss": 91.1092, "step": 1692, "task_loss": 2.5657031536102295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8147081952715118, "compression/movement_sparsity/importance_threshold": -0.0008491685649713908, "compression/movement_sparsity/linear_layer_sparsity": 0.7045680508431245, "compression/movement_sparsity/model_sparsity": 0.680363970791814, "compression_loss": 87.58454895019531, "distillation_loss": 7.353784084320068, "epoch": 1.43, "learning_rate": 3.977780461296945e-05, "loss": 92.1664, "step": 1693, "task_loss": 3.8329851627349854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8155304949777777, "compression/movement_sparsity/importance_threshold": -0.0008454000709326535, "compression/movement_sparsity/linear_layer_sparsity": 0.7054436067001231, "compression/movement_sparsity/model_sparsity": 0.6812094486115997, "compression_loss": 87.67240142822266, "distillation_loss": 3.1483664512634277, "epoch": 1.43, "learning_rate": 3.977176669484362e-05, "loss": 91.5349, "step": 1694, "task_loss": 0.3589246869087219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8163503582422545, "compression/movement_sparsity/importance_threshold": -0.0008416427427939983, "compression/movement_sparsity/linear_layer_sparsity": 0.7064532498221868, "compression/movement_sparsity/model_sparsity": 0.6821844073863951, "compression_loss": 87.76002502441406, "distillation_loss": 4.082873344421387, "epoch": 1.43, "learning_rate": 3.976572877671779e-05, "loss": 92.0415, "step": 1695, "task_loss": 1.9781314134597778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8171677886798413, "compression/movement_sparsity/importance_threshold": -0.0008378965639888073, "compression/movement_sparsity/linear_layer_sparsity": 0.7074029501535452, "compression/movement_sparsity/model_sparsity": 0.6831014825897513, "compression_loss": 87.84733581542969, "distillation_loss": 5.187838077545166, "epoch": 1.43, "learning_rate": 3.9759690858591956e-05, "loss": 91.8389, "step": 1696, "task_loss": 2.631863594055176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8179827899054365, "compression/movement_sparsity/importance_threshold": -0.0008341615179504641, "compression/movement_sparsity/linear_layer_sparsity": 0.7084458736274806, "compression/movement_sparsity/model_sparsity": 0.6841085784339492, "compression_loss": 87.93437957763672, "distillation_loss": 4.375911235809326, "epoch": 1.43, "learning_rate": 3.975365294046613e-05, "loss": 91.9812, "step": 1697, "task_loss": 3.066697359085083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8187953655339393, "compression/movement_sparsity/importance_threshold": -0.0008304375881123499, "compression/movement_sparsity/linear_layer_sparsity": 0.7095419311924015, "compression/movement_sparsity/model_sparsity": 0.6851669830496476, "compression_loss": 88.02117919921875, "distillation_loss": 4.879804611206055, "epoch": 1.44, "learning_rate": 3.9747615022340304e-05, "loss": 92.2967, "step": 1698, "task_loss": 3.4494991302490234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8196055191802482, "compression/movement_sparsity/importance_threshold": -0.0008267247579078489, "compression/movement_sparsity/linear_layer_sparsity": 0.7106569124113605, "compression/movement_sparsity/model_sparsity": 0.6862436612336519, "compression_loss": 88.10771942138672, "distillation_loss": 4.804948806762695, "epoch": 1.44, "learning_rate": 3.9741577104214464e-05, "loss": 92.2535, "step": 1699, "task_loss": 3.2002995014190674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8204132544592619, "compression/movement_sparsity/importance_threshold": -0.0008230230107703434, "compression/movement_sparsity/linear_layer_sparsity": 0.7116843106190341, "compression/movement_sparsity/model_sparsity": 0.6872357651522454, "compression_loss": 88.19398498535156, "distillation_loss": 5.048990249633789, "epoch": 1.44, "learning_rate": 3.973553918608864e-05, "loss": 93.472, "step": 1700, "task_loss": 2.2576708793640137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8212185749858798, "compression/movement_sparsity/importance_threshold": -0.0008193323301332145, "compression/movement_sparsity/linear_layer_sparsity": 0.7125596637651828, "compression/movement_sparsity/model_sparsity": 0.6880810472249226, "compression_loss": 88.280029296875, "distillation_loss": 4.423113822937012, "epoch": 1.44, "learning_rate": 3.972950126796281e-05, "loss": 92.9265, "step": 1701, "task_loss": 2.778592824935913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.822021484375, "compression/movement_sparsity/importance_threshold": -0.0008156526994298474, "compression/movement_sparsity/linear_layer_sparsity": 0.7135603399131845, "compression/movement_sparsity/model_sparsity": 0.6890473470688004, "compression_loss": 88.36579132080078, "distillation_loss": 3.08911395072937, "epoch": 1.44, "learning_rate": 3.972346334983697e-05, "loss": 91.8567, "step": 1702, "task_loss": 1.2686666250228882 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8228219862415216, "compression/movement_sparsity/importance_threshold": -0.0008119841020936235, "compression/movement_sparsity/linear_layer_sparsity": 0.7145138082815157, "compression/movement_sparsity/model_sparsity": 0.6899680608654677, "compression_loss": 88.45133972167969, "distillation_loss": 5.24107551574707, "epoch": 1.44, "learning_rate": 3.971742543171115e-05, "loss": 93.0667, "step": 1703, "task_loss": 2.4169652462005615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8236200842003436, "compression/movement_sparsity/importance_threshold": -0.0008083265215579243, "compression/movement_sparsity/linear_layer_sparsity": 0.7155342785477928, "compression/movement_sparsity/model_sparsity": 0.6909534748387646, "compression_loss": 88.53665924072266, "distillation_loss": 3.1375508308410645, "epoch": 1.44, "learning_rate": 3.971138751358532e-05, "loss": 92.1355, "step": 1704, "task_loss": 2.1223485469818115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8244157818663643, "compression/movement_sparsity/importance_threshold": -0.0008046799412561351, "compression/movement_sparsity/linear_layer_sparsity": 0.7163931167217659, "compression/movement_sparsity/model_sparsity": 0.6917828092793663, "compression_loss": 88.62168884277344, "distillation_loss": 4.261159896850586, "epoch": 1.44, "learning_rate": 3.970534959545949e-05, "loss": 92.6426, "step": 1705, "task_loss": 2.6342337131500244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8252090828544829, "compression/movement_sparsity/importance_threshold": -0.0008010443446216367, "compression/movement_sparsity/linear_layer_sparsity": 0.7172605045239339, "compression/movement_sparsity/model_sparsity": 0.6926203996421328, "compression_loss": 88.7064208984375, "distillation_loss": 3.0820817947387695, "epoch": 1.44, "learning_rate": 3.9699311677333655e-05, "loss": 92.48, "step": 1706, "task_loss": 2.019920825958252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8259999907795982, "compression/movement_sparsity/importance_threshold": -0.0007974197150878112, "compression/movement_sparsity/linear_layer_sparsity": 0.7182423047145678, "compression/movement_sparsity/model_sparsity": 0.6935684719758479, "compression_loss": 88.79090118408203, "distillation_loss": 4.3953046798706055, "epoch": 1.44, "learning_rate": 3.969327375920783e-05, "loss": 92.1032, "step": 1707, "task_loss": 2.0458595752716064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8267885092566086, "compression/movement_sparsity/importance_threshold": -0.0007938060360880438, "compression/movement_sparsity/linear_layer_sparsity": 0.7194007137520567, "compression/movement_sparsity/model_sparsity": 0.694687086099216, "compression_loss": 88.87515258789062, "distillation_loss": 5.828773498535156, "epoch": 1.44, "learning_rate": 3.9687235841081996e-05, "loss": 93.007, "step": 1708, "task_loss": 2.769258737564087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8275746419004134, "compression/movement_sparsity/importance_threshold": -0.0007902032910557144, "compression/movement_sparsity/linear_layer_sparsity": 0.7203048160663754, "compression/movement_sparsity/model_sparsity": 0.6955601297176939, "compression_loss": 88.95913696289062, "distillation_loss": 4.946264743804932, "epoch": 1.44, "learning_rate": 3.968119792295616e-05, "loss": 92.9347, "step": 1709, "task_loss": 2.4317002296447754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8283583923259111, "compression/movement_sparsity/importance_threshold": -0.0007866114634242074, "compression/movement_sparsity/linear_layer_sparsity": 0.721423481853134, "compression/movement_sparsity/model_sparsity": 0.6966403658932587, "compression_loss": 89.0428466796875, "distillation_loss": 4.539848804473877, "epoch": 1.45, "learning_rate": 3.967516000483034e-05, "loss": 93.2585, "step": 1710, "task_loss": 2.2182223796844482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8291397641480005, "compression/movement_sparsity/importance_threshold": -0.0007830305366269053, "compression/movement_sparsity/linear_layer_sparsity": 0.7223996657608115, "compression/movement_sparsity/model_sparsity": 0.6975830148806146, "compression_loss": 89.12628936767578, "distillation_loss": 7.0890913009643555, "epoch": 1.45, "learning_rate": 3.9669122086704504e-05, "loss": 94.0418, "step": 1711, "task_loss": 3.6528878211975098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8299187609815806, "compression/movement_sparsity/importance_threshold": -0.0007794604940971899, "compression/movement_sparsity/linear_layer_sparsity": 0.7234015939464149, "compression/movement_sparsity/model_sparsity": 0.6985505237507509, "compression_loss": 89.20943450927734, "distillation_loss": 5.00452995300293, "epoch": 1.45, "learning_rate": 3.966308416857867e-05, "loss": 93.3753, "step": 1712, "task_loss": 2.7589199542999268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8306953864415499, "compression/movement_sparsity/importance_threshold": -0.0007759013192684444, "compression/movement_sparsity/linear_layer_sparsity": 0.7242709134637398, "compression/movement_sparsity/model_sparsity": 0.699389979468316, "compression_loss": 89.29234313964844, "distillation_loss": 5.70991325378418, "epoch": 1.45, "learning_rate": 3.9657046250452846e-05, "loss": 94.0832, "step": 1713, "task_loss": 3.1055397987365723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8314696441428073, "compression/movement_sparsity/importance_threshold": -0.000772352995574052, "compression/movement_sparsity/linear_layer_sparsity": 0.7251156572715672, "compression/movement_sparsity/model_sparsity": 0.7002057037276087, "compression_loss": 89.37500762939453, "distillation_loss": 3.8548972606658936, "epoch": 1.45, "learning_rate": 3.965100833232702e-05, "loss": 93.6272, "step": 1714, "task_loss": 2.6600823402404785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8322415377002519, "compression/movement_sparsity/importance_threshold": -0.0007688155064473944, "compression/movement_sparsity/linear_layer_sparsity": 0.7260042462437918, "compression/movement_sparsity/model_sparsity": 0.7010637669350179, "compression_loss": 89.45740509033203, "distillation_loss": 5.214330673217773, "epoch": 1.45, "learning_rate": 3.964497041420119e-05, "loss": 93.7199, "step": 1715, "task_loss": 2.6424620151519775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8330110707287821, "compression/movement_sparsity/importance_threshold": -0.0007652888353218545, "compression/movement_sparsity/linear_layer_sparsity": 0.7269540777409941, "compression/movement_sparsity/model_sparsity": 0.7019809687982679, "compression_loss": 89.53958129882812, "distillation_loss": 4.565717697143555, "epoch": 1.45, "learning_rate": 3.9638932496075354e-05, "loss": 94.6576, "step": 1716, "task_loss": 1.9385414123535156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8337782468432968, "compression/movement_sparsity/importance_threshold": -0.0007617729656308157, "compression/movement_sparsity/linear_layer_sparsity": 0.7277898188022565, "compression/movement_sparsity/model_sparsity": 0.7027879995830356, "compression_loss": 89.62149047851562, "distillation_loss": 4.476479530334473, "epoch": 1.45, "learning_rate": 3.963289457794953e-05, "loss": 94.349, "step": 1717, "task_loss": 2.72652530670166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.834543069658695, "compression/movement_sparsity/importance_threshold": -0.0007582678808076594, "compression/movement_sparsity/linear_layer_sparsity": 0.7286424921815617, "compression/movement_sparsity/model_sparsity": 0.7036113810086316, "compression_loss": 89.70315551757812, "distillation_loss": 4.340365409851074, "epoch": 1.45, "learning_rate": 3.9626856659823695e-05, "loss": 93.7463, "step": 1718, "task_loss": 3.3404006958007812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8353055427898752, "compression/movement_sparsity/importance_threshold": -0.0007547735642857694, "compression/movement_sparsity/linear_layer_sparsity": 0.729567306775064, "compression/movement_sparsity/model_sparsity": 0.7045044253757846, "compression_loss": 89.78459930419922, "distillation_loss": 6.088420391082764, "epoch": 1.45, "learning_rate": 3.962081874169786e-05, "loss": 95.1147, "step": 1719, "task_loss": 3.1669814586639404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8360656698517362, "compression/movement_sparsity/importance_threshold": -0.0007512899994985287, "compression/movement_sparsity/linear_layer_sparsity": 0.7304980834523842, "compression/movement_sparsity/model_sparsity": 0.7054032270108349, "compression_loss": 89.86572265625, "distillation_loss": 4.3518218994140625, "epoch": 1.45, "learning_rate": 3.9614780823572036e-05, "loss": 94.4191, "step": 1720, "task_loss": 2.6633100509643555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8368234544591773, "compression/movement_sparsity/importance_threshold": -0.0007478171698793181, "compression/movement_sparsity/linear_layer_sparsity": 0.7312944866846158, "compression/movement_sparsity/model_sparsity": 0.7061722713420165, "compression_loss": 89.94664001464844, "distillation_loss": 4.066655158996582, "epoch": 1.45, "learning_rate": 3.9608742905446203e-05, "loss": 94.3154, "step": 1721, "task_loss": 2.176384449005127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8375789002270966, "compression/movement_sparsity/importance_threshold": -0.0007443550588615224, "compression/movement_sparsity/linear_layer_sparsity": 0.7322621448124338, "compression/movement_sparsity/model_sparsity": 0.7071066874362792, "compression_loss": 90.02726745605469, "distillation_loss": 3.522634744644165, "epoch": 1.46, "learning_rate": 3.960270498732037e-05, "loss": 94.6866, "step": 1722, "task_loss": 2.328641891479492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8383320107703933, "compression/movement_sparsity/importance_threshold": -0.0007409036498785227, "compression/movement_sparsity/linear_layer_sparsity": 0.7331719707272177, "compression/movement_sparsity/model_sparsity": 0.7079852580319386, "compression_loss": 90.10763549804688, "distillation_loss": 5.2428154945373535, "epoch": 1.46, "learning_rate": 3.9596667069194545e-05, "loss": 94.2867, "step": 1723, "task_loss": 2.2871320247650146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8390827897039662, "compression/movement_sparsity/importance_threshold": -0.0007374629263637023, "compression/movement_sparsity/linear_layer_sparsity": 0.7340606312444481, "compression/movement_sparsity/model_sparsity": 0.7088433903265625, "compression_loss": 90.18778228759766, "distillation_loss": 6.001062393188477, "epoch": 1.46, "learning_rate": 3.959062915106871e-05, "loss": 95.2397, "step": 1724, "task_loss": 2.6104485988616943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.839831240642714, "compression/movement_sparsity/importance_threshold": -0.0007340328717504439, "compression/movement_sparsity/linear_layer_sparsity": 0.7348701152885763, "compression/movement_sparsity/model_sparsity": 0.7096250661035107, "compression_loss": 90.26757049560547, "distillation_loss": 3.804311752319336, "epoch": 1.46, "learning_rate": 3.9584591232942886e-05, "loss": 94.4792, "step": 1725, "task_loss": 2.3821144104003906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8405773672015354, "compression/movement_sparsity/importance_threshold": -0.0007306134694721302, "compression/movement_sparsity/linear_layer_sparsity": 0.7358785302213736, "compression/movement_sparsity/model_sparsity": 0.7105988388811192, "compression_loss": 90.34716796875, "distillation_loss": 3.976909875869751, "epoch": 1.46, "learning_rate": 3.957855331481705e-05, "loss": 94.6397, "step": 1726, "task_loss": 2.1818578243255615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8413211729953295, "compression/movement_sparsity/importance_threshold": -0.0007272047029621433, "compression/movement_sparsity/linear_layer_sparsity": 0.7367668091652396, "compression/movement_sparsity/model_sparsity": 0.7114566027105977, "compression_loss": 90.42649841308594, "distillation_loss": 5.444144248962402, "epoch": 1.46, "learning_rate": 3.957251539669122e-05, "loss": 95.8259, "step": 1727, "task_loss": 2.3966407775878906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8420626616389947, "compression/movement_sparsity/importance_threshold": -0.0007238065556538667, "compression/movement_sparsity/linear_layer_sparsity": 0.7376604897570446, "compression/movement_sparsity/model_sparsity": 0.7123195826247912, "compression_loss": 90.50566864013672, "distillation_loss": 6.528076171875, "epoch": 1.46, "learning_rate": 3.9566477478565394e-05, "loss": 95.8898, "step": 1728, "task_loss": 2.8657712936401367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8428018367474301, "compression/movement_sparsity/importance_threshold": -0.0007204190109806824, "compression/movement_sparsity/linear_layer_sparsity": 0.7384727879047349, "compression/movement_sparsity/model_sparsity": 0.7131039758321869, "compression_loss": 90.58452606201172, "distillation_loss": 5.820781707763672, "epoch": 1.46, "learning_rate": 3.956043956043956e-05, "loss": 95.144, "step": 1729, "task_loss": 2.8982434272766113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8435387019355345, "compression/movement_sparsity/importance_threshold": -0.0007170420523759736, "compression/movement_sparsity/linear_layer_sparsity": 0.7392945896140309, "compression/movement_sparsity/model_sparsity": 0.7138975461246109, "compression_loss": 90.66312408447266, "distillation_loss": 5.954009056091309, "epoch": 1.46, "learning_rate": 3.9554401642313735e-05, "loss": 94.9069, "step": 1730, "task_loss": 3.2563514709472656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8442732608182064, "compression/movement_sparsity/importance_threshold": -0.0007136756632731226, "compression/movement_sparsity/linear_layer_sparsity": 0.740116355550824, "compression/movement_sparsity/model_sparsity": 0.7146910818734274, "compression_loss": 90.74149322509766, "distillation_loss": 5.817266464233398, "epoch": 1.46, "learning_rate": 3.95483637241879e-05, "loss": 95.8474, "step": 1731, "task_loss": 2.7089242935180664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8450055170103449, "compression/movement_sparsity/importance_threshold": -0.0007103198271055126, "compression/movement_sparsity/linear_layer_sparsity": 0.7409513096170225, "compression/movement_sparsity/model_sparsity": 0.7154973526988327, "compression_loss": 90.819580078125, "distillation_loss": 4.9026994705200195, "epoch": 1.46, "learning_rate": 3.954232580606207e-05, "loss": 95.6568, "step": 1732, "task_loss": 3.7303638458251953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8457354741268487, "compression/movement_sparsity/importance_threshold": -0.0007069745273065252, "compression/movement_sparsity/linear_layer_sparsity": 0.741876768115577, "compression/movement_sparsity/model_sparsity": 0.7163910188509186, "compression_loss": 90.89744567871094, "distillation_loss": 4.284668922424316, "epoch": 1.46, "learning_rate": 3.9536287887936244e-05, "loss": 95.1398, "step": 1733, "task_loss": 1.7851847410202026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8464631357826164, "compression/movement_sparsity/importance_threshold": -0.0007036397473095448, "compression/movement_sparsity/linear_layer_sparsity": 0.7427895035272641, "compression/movement_sparsity/model_sparsity": 0.7172723989933117, "compression_loss": 90.97504425048828, "distillation_loss": 6.993381500244141, "epoch": 1.47, "learning_rate": 3.953024996981041e-05, "loss": 96.6543, "step": 1734, "task_loss": 3.5784246921539307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8471885055925472, "compression/movement_sparsity/importance_threshold": -0.0007003154705479521, "compression/movement_sparsity/linear_layer_sparsity": 0.7436095523839178, "compression/movement_sparsity/model_sparsity": 0.7180642766489739, "compression_loss": 91.05244445800781, "distillation_loss": 4.890755653381348, "epoch": 1.47, "learning_rate": 3.9524212051684585e-05, "loss": 95.6298, "step": 1735, "task_loss": 1.8569822311401367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8479115871715397, "compression/movement_sparsity/importance_threshold": -0.0006970016804551302, "compression/movement_sparsity/linear_layer_sparsity": 0.7444469509044815, "compression/movement_sparsity/model_sparsity": 0.7188729079542171, "compression_loss": 91.12957763671875, "distillation_loss": 3.2898364067077637, "epoch": 1.47, "learning_rate": 3.951817413355875e-05, "loss": 95.4651, "step": 1736, "task_loss": 2.467693328857422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8486323841344925, "compression/movement_sparsity/importance_threshold": -0.0006936983604644634, "compression/movement_sparsity/linear_layer_sparsity": 0.7451821473842383, "compression/movement_sparsity/model_sparsity": 0.7195828481731653, "compression_loss": 91.20645904541016, "distillation_loss": 5.927244186401367, "epoch": 1.47, "learning_rate": 3.951213621543292e-05, "loss": 96.0579, "step": 1737, "task_loss": 2.4434263706207275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8493509000963049, "compression/movement_sparsity/importance_threshold": -0.000690405494009332, "compression/movement_sparsity/linear_layer_sparsity": 0.746008575670577, "compression/movement_sparsity/model_sparsity": 0.7203808861054776, "compression_loss": 91.28307342529297, "distillation_loss": 3.684591054916382, "epoch": 1.47, "learning_rate": 3.950609829730709e-05, "loss": 95.8426, "step": 1738, "task_loss": 3.1254310607910156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.850067138671875, "compression/movement_sparsity/importance_threshold": -0.0006871230645231208, "compression/movement_sparsity/linear_layer_sparsity": 0.7469053088492968, "compression/movement_sparsity/model_sparsity": 0.7212468137408344, "compression_loss": 91.35946655273438, "distillation_loss": 4.4885783195495605, "epoch": 1.47, "learning_rate": 3.950006037918126e-05, "loss": 95.9735, "step": 1739, "task_loss": 2.2474141120910645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.850781103476102, "compression/movement_sparsity/importance_threshold": -0.0006838510554392115, "compression/movement_sparsity/linear_layer_sparsity": 0.7477386173803615, "compression/movement_sparsity/model_sparsity": 0.7220514955603001, "compression_loss": 91.43559265136719, "distillation_loss": 6.071319580078125, "epoch": 1.47, "learning_rate": 3.949402246105543e-05, "loss": 96.5924, "step": 1740, "task_loss": 2.945770740509033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8514927981238849, "compression/movement_sparsity/importance_threshold": -0.0006805894501909862, "compression/movement_sparsity/linear_layer_sparsity": 0.7486116572379888, "compression/movement_sparsity/model_sparsity": 0.7228945438130332, "compression_loss": 91.51155090332031, "distillation_loss": 4.894759178161621, "epoch": 1.47, "learning_rate": 3.94879845429296e-05, "loss": 97.0103, "step": 1741, "task_loss": 3.389758825302124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8522022262301221, "compression/movement_sparsity/importance_threshold": -0.0006773382322118287, "compression/movement_sparsity/linear_layer_sparsity": 0.7494972413199691, "compression/movement_sparsity/model_sparsity": 0.7237497053574222, "compression_loss": 91.5871810913086, "distillation_loss": 4.286210060119629, "epoch": 1.47, "learning_rate": 3.948194662480377e-05, "loss": 96.1774, "step": 1742, "task_loss": 3.875507354736328 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8529093914097126, "compression/movement_sparsity/importance_threshold": -0.0006740973849351205, "compression/movement_sparsity/linear_layer_sparsity": 0.7503424859428373, "compression/movement_sparsity/model_sparsity": 0.7245659132272182, "compression_loss": 91.6625747680664, "distillation_loss": 6.482807159423828, "epoch": 1.47, "learning_rate": 3.9475908706677936e-05, "loss": 96.7181, "step": 1743, "task_loss": 2.658918857574463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8536142972775551, "compression/movement_sparsity/importance_threshold": -0.0006708668917942461, "compression/movement_sparsity/linear_layer_sparsity": 0.7509568071352692, "compression/movement_sparsity/model_sparsity": 0.7251591305968171, "compression_loss": 91.73773956298828, "distillation_loss": 7.259367942810059, "epoch": 1.47, "learning_rate": 3.946987078855211e-05, "loss": 96.7604, "step": 1744, "task_loss": 3.555168390274048 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8543169474485485, "compression/movement_sparsity/importance_threshold": -0.0006676467362225855, "compression/movement_sparsity/linear_layer_sparsity": 0.7516096076166623, "compression/movement_sparsity/model_sparsity": 0.7257895053734251, "compression_loss": 91.8126449584961, "distillation_loss": 5.295628547668457, "epoch": 1.47, "learning_rate": 3.9463832870426284e-05, "loss": 96.4212, "step": 1745, "task_loss": 2.1933910846710205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8550173455375916, "compression/movement_sparsity/importance_threshold": -0.0006644369016535236, "compression/movement_sparsity/linear_layer_sparsity": 0.7523509211944162, "compression/movement_sparsity/model_sparsity": 0.7265053525492359, "compression_loss": 91.8873062133789, "distillation_loss": 5.798072814941406, "epoch": 1.48, "learning_rate": 3.945779495230045e-05, "loss": 97.3556, "step": 1746, "task_loss": 3.4246582984924316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8557154951595831, "compression/movement_sparsity/importance_threshold": -0.0006612373715204423, "compression/movement_sparsity/linear_layer_sparsity": 0.7532759384987683, "compression/movement_sparsity/model_sparsity": 0.7273985926634974, "compression_loss": 91.96172332763672, "distillation_loss": 5.721127033233643, "epoch": 1.48, "learning_rate": 3.945175703417462e-05, "loss": 97.2576, "step": 1747, "task_loss": 2.687535285949707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8564113999294217, "compression/movement_sparsity/importance_threshold": -0.0006580481292567247, "compression/movement_sparsity/linear_layer_sparsity": 0.7539618762420213, "compression/movement_sparsity/model_sparsity": 0.7280609663350783, "compression_loss": 92.03585815429688, "distillation_loss": 4.806466579437256, "epoch": 1.48, "learning_rate": 3.944571911604879e-05, "loss": 96.5137, "step": 1748, "task_loss": 2.144590377807617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8571050634620065, "compression/movement_sparsity/importance_threshold": -0.0006548691582957521, "compression/movement_sparsity/linear_layer_sparsity": 0.7547593884218431, "compression/movement_sparsity/model_sparsity": 0.7288310815180888, "compression_loss": 92.10978698730469, "distillation_loss": 3.827007293701172, "epoch": 1.48, "learning_rate": 3.943968119792296e-05, "loss": 96.5679, "step": 1749, "task_loss": 2.1379213333129883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8577964893722361, "compression/movement_sparsity/importance_threshold": -0.0006517004420709084, "compression/movement_sparsity/linear_layer_sparsity": 0.7556144227863403, "compression/movement_sparsity/model_sparsity": 0.7296567428217722, "compression_loss": 92.18351745605469, "distillation_loss": 4.799386978149414, "epoch": 1.48, "learning_rate": 3.9433643279797126e-05, "loss": 97.4941, "step": 1750, "task_loss": 2.7740321159362793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8584856812750092, "compression/movement_sparsity/importance_threshold": -0.0006485419640155764, "compression/movement_sparsity/linear_layer_sparsity": 0.7564820967685315, "compression/movement_sparsity/model_sparsity": 0.7304946095333977, "compression_loss": 92.2569351196289, "distillation_loss": 5.660672187805176, "epoch": 1.48, "learning_rate": 3.94276053616713e-05, "loss": 96.4111, "step": 1751, "task_loss": 2.7557992935180664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.859172642785225, "compression/movement_sparsity/importance_threshold": -0.0006453937075631376, "compression/movement_sparsity/linear_layer_sparsity": 0.7570842791583102, "compression/movement_sparsity/model_sparsity": 0.7310761051055579, "compression_loss": 92.33015441894531, "distillation_loss": 4.347682476043701, "epoch": 1.48, "learning_rate": 3.942156744354547e-05, "loss": 96.9688, "step": 1752, "task_loss": 1.4376165866851807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8598573775177818, "compression/movement_sparsity/importance_threshold": -0.0006422556561469766, "compression/movement_sparsity/linear_layer_sparsity": 0.7578741956433479, "compression/movement_sparsity/model_sparsity": 0.7318388855292672, "compression_loss": 92.40312194824219, "distillation_loss": 3.484483242034912, "epoch": 1.48, "learning_rate": 3.9415529525419635e-05, "loss": 97.0369, "step": 1753, "task_loss": 1.745424747467041 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8605398890875786, "compression/movement_sparsity/importance_threshold": -0.0006391277932004747, "compression/movement_sparsity/linear_layer_sparsity": 0.7587540680490307, "compression/movement_sparsity/model_sparsity": 0.7326885316110106, "compression_loss": 92.47583770751953, "distillation_loss": 4.7671895027160645, "epoch": 1.48, "learning_rate": 3.940949160729381e-05, "loss": 96.9216, "step": 1754, "task_loss": 2.581732749938965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8612201811095144, "compression/movement_sparsity/importance_threshold": -0.000636010102157014, "compression/movement_sparsity/linear_layer_sparsity": 0.7593654081995537, "compression/movement_sparsity/model_sparsity": 0.7332788703466608, "compression_loss": 92.54832458496094, "distillation_loss": 5.140905380249023, "epoch": 1.48, "learning_rate": 3.940345368916798e-05, "loss": 97.1147, "step": 1755, "task_loss": 2.951785087585449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8618982571984878, "compression/movement_sparsity/importance_threshold": -0.0006329025664499775, "compression/movement_sparsity/linear_layer_sparsity": 0.760145940364662, "compression/movement_sparsity/model_sparsity": 0.7340325888306999, "compression_loss": 92.62055206298828, "distillation_loss": 3.728645086288452, "epoch": 1.48, "learning_rate": 3.939741577104214e-05, "loss": 96.6608, "step": 1756, "task_loss": 2.504098653793335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8625741209693975, "compression/movement_sparsity/importance_threshold": -0.0006298051695127491, "compression/movement_sparsity/linear_layer_sparsity": 0.7609613387959376, "compression/movement_sparsity/model_sparsity": 0.7348199758174022, "compression_loss": 92.69254302978516, "distillation_loss": 5.459101676940918, "epoch": 1.48, "learning_rate": 3.939137785291632e-05, "loss": 97.1889, "step": 1757, "task_loss": 2.4609711170196533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8632477760371424, "compression/movement_sparsity/importance_threshold": -0.0006267178947787109, "compression/movement_sparsity/linear_layer_sparsity": 0.7618063568596207, "compression/movement_sparsity/model_sparsity": 0.7356359649110181, "compression_loss": 92.76434326171875, "distillation_loss": 3.896557331085205, "epoch": 1.49, "learning_rate": 3.938533993479049e-05, "loss": 97.0028, "step": 1758, "task_loss": 2.3121657371520996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8639192260166213, "compression/movement_sparsity/importance_threshold": -0.0006236407256812451, "compression/movement_sparsity/linear_layer_sparsity": 0.762730217519712, "compression/movement_sparsity/model_sparsity": 0.7365280881153076, "compression_loss": 92.83592224121094, "distillation_loss": 5.186717987060547, "epoch": 1.49, "learning_rate": 3.937930201666465e-05, "loss": 97.3133, "step": 1759, "task_loss": 2.339453935623169 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8645884745227331, "compression/movement_sparsity/importance_threshold": -0.0006205736456537337, "compression/movement_sparsity/linear_layer_sparsity": 0.7635067073919919, "compression/movement_sparsity/model_sparsity": 0.7372779031717123, "compression_loss": 92.9073715209961, "distillation_loss": 4.970376968383789, "epoch": 1.49, "learning_rate": 3.9373264098538825e-05, "loss": 98.2178, "step": 1760, "task_loss": 2.474046468734741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8652555251703765, "compression/movement_sparsity/importance_threshold": -0.0006175166381295607, "compression/movement_sparsity/linear_layer_sparsity": 0.7642459103920742, "compression/movement_sparsity/model_sparsity": 0.7379917122746874, "compression_loss": 92.97843170166016, "distillation_loss": 5.091136455535889, "epoch": 1.49, "learning_rate": 3.9367226180413e-05, "loss": 98.3346, "step": 1761, "task_loss": 2.809150218963623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8659203815744502, "compression/movement_sparsity/importance_threshold": -0.000614469686542109, "compression/movement_sparsity/linear_layer_sparsity": 0.7651127735308663, "compression/movement_sparsity/model_sparsity": 0.738828795997879, "compression_loss": 93.04933166503906, "distillation_loss": 4.536993026733398, "epoch": 1.49, "learning_rate": 3.9361188262287166e-05, "loss": 98.5893, "step": 1762, "task_loss": 2.4258875846862793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8665830473498531, "compression/movement_sparsity/importance_threshold": -0.00061143277432476, "compression/movement_sparsity/linear_layer_sparsity": 0.7656989536876776, "compression/movement_sparsity/model_sparsity": 0.7393948390630029, "compression_loss": 93.1199722290039, "distillation_loss": 6.29010534286499, "epoch": 1.49, "learning_rate": 3.9355150344161334e-05, "loss": 97.8892, "step": 1763, "task_loss": 2.9006991386413574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.867243526111484, "compression/movement_sparsity/importance_threshold": -0.0006084058849108974, "compression/movement_sparsity/linear_layer_sparsity": 0.7663638214267181, "compression/movement_sparsity/model_sparsity": 0.7400368665498348, "compression_loss": 93.19038391113281, "distillation_loss": 6.476518630981445, "epoch": 1.49, "learning_rate": 3.934911242603551e-05, "loss": 98.2353, "step": 1764, "task_loss": 2.3078627586364746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8679018214742417, "compression/movement_sparsity/importance_threshold": -0.0006053890017339034, "compression/movement_sparsity/linear_layer_sparsity": 0.7670148690554686, "compression/movement_sparsity/model_sparsity": 0.7406655486896809, "compression_loss": 93.26052856445312, "distillation_loss": 3.1923294067382812, "epoch": 1.49, "learning_rate": 3.9343074507909675e-05, "loss": 97.0714, "step": 1765, "task_loss": 1.7249267101287842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.868557937053025, "compression/movement_sparsity/importance_threshold": -0.0006023821082271601, "compression/movement_sparsity/linear_layer_sparsity": 0.7678289081316336, "compression/movement_sparsity/model_sparsity": 0.7414516230193027, "compression_loss": 93.33043670654297, "distillation_loss": 4.851028919219971, "epoch": 1.49, "learning_rate": 3.933703658978384e-05, "loss": 98.3393, "step": 1766, "task_loss": 1.9863370656967163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8692118764627327, "compression/movement_sparsity/importance_threshold": -0.0005993851878240515, "compression/movement_sparsity/linear_layer_sparsity": 0.7685642477014021, "compression/movement_sparsity/model_sparsity": 0.7421617014126805, "compression_loss": 93.40010833740234, "distillation_loss": 3.741544008255005, "epoch": 1.49, "learning_rate": 3.9330998671658016e-05, "loss": 97.471, "step": 1767, "task_loss": 1.8945194482803345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8698636433182634, "compression/movement_sparsity/importance_threshold": -0.0005963982239579595, "compression/movement_sparsity/linear_layer_sparsity": 0.7693035818673285, "compression/movement_sparsity/model_sparsity": 0.7428756371755493, "compression_loss": 93.46953582763672, "distillation_loss": 6.349115371704102, "epoch": 1.49, "learning_rate": 3.932496075353218e-05, "loss": 98.8785, "step": 1768, "task_loss": 4.054642200469971 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8705132412345162, "compression/movement_sparsity/importance_threshold": -0.0005934212000622665, "compression/movement_sparsity/linear_layer_sparsity": 0.7700821703931121, "compression/movement_sparsity/model_sparsity": 0.7436274787902538, "compression_loss": 93.53874969482422, "distillation_loss": 6.062593936920166, "epoch": 1.5, "learning_rate": 3.931892283540635e-05, "loss": 98.9934, "step": 1769, "task_loss": 3.051285982131958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8711606738263898, "compression/movement_sparsity/importance_threshold": -0.0005904540995703552, "compression/movement_sparsity/linear_layer_sparsity": 0.7706652621905058, "compression/movement_sparsity/model_sparsity": 0.7441905395906069, "compression_loss": 93.60774993896484, "distillation_loss": 4.646556854248047, "epoch": 1.5, "learning_rate": 3.9312884917280524e-05, "loss": 99.1594, "step": 1770, "task_loss": 2.9270153045654297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8718059447087829, "compression/movement_sparsity/importance_threshold": -0.0005874969059156089, "compression/movement_sparsity/linear_layer_sparsity": 0.7713411478604418, "compression/movement_sparsity/model_sparsity": 0.7448432065085131, "compression_loss": 93.67647552490234, "distillation_loss": 3.7188363075256348, "epoch": 1.5, "learning_rate": 3.93068469991547e-05, "loss": 97.9669, "step": 1771, "task_loss": 1.502801775932312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8724490574965944, "compression/movement_sparsity/importance_threshold": -0.0005845496025314095, "compression/movement_sparsity/linear_layer_sparsity": 0.7721029948548648, "compression/movement_sparsity/model_sparsity": 0.7455788817149621, "compression_loss": 93.74494934082031, "distillation_loss": 4.805277347564697, "epoch": 1.5, "learning_rate": 3.930080908102886e-05, "loss": 98.2487, "step": 1772, "task_loss": 3.337299346923828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8730900158047231, "compression/movement_sparsity/importance_threshold": -0.0005816121728511401, "compression/movement_sparsity/linear_layer_sparsity": 0.7729033807590868, "compression/movement_sparsity/model_sparsity": 0.746351771901099, "compression_loss": 93.8132095336914, "distillation_loss": 4.188114166259766, "epoch": 1.5, "learning_rate": 3.929477116290303e-05, "loss": 98.2864, "step": 1773, "task_loss": 1.9522638320922852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8737288232480677, "compression/movement_sparsity/importance_threshold": -0.0005786846003081837, "compression/movement_sparsity/linear_layer_sparsity": 0.7737298090454255, "compression/movement_sparsity/model_sparsity": 0.7471498098334113, "compression_loss": 93.88125610351562, "distillation_loss": 3.9206347465515137, "epoch": 1.5, "learning_rate": 3.9288733244777206e-05, "loss": 98.835, "step": 1774, "task_loss": 3.552976369857788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8743654834415271, "compression/movement_sparsity/importance_threshold": -0.0005757668683359224, "compression/movement_sparsity/linear_layer_sparsity": 0.7744417176257895, "compression/movement_sparsity/model_sparsity": 0.7478372621639527, "compression_loss": 93.94900512695312, "distillation_loss": 5.78657341003418, "epoch": 1.5, "learning_rate": 3.928269532665137e-05, "loss": 98.6962, "step": 1775, "task_loss": 2.7942609786987305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.875, "compression/movement_sparsity/importance_threshold": -0.0005728589603677392, "compression/movement_sparsity/linear_layer_sparsity": 0.7752260178278707, "compression/movement_sparsity/model_sparsity": 0.7485946192413029, "compression_loss": 94.01655578613281, "distillation_loss": 5.933381080627441, "epoch": 1.5, "learning_rate": 3.927665740852554e-05, "loss": 99.6084, "step": 1776, "task_loss": 2.5799753665924072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8756323765383853, "compression/movement_sparsity/importance_threshold": -0.0005699608598370163, "compression/movement_sparsity/linear_layer_sparsity": 0.7759799233266481, "compression/movement_sparsity/model_sparsity": 0.7493226257669127, "compression_loss": 94.08393096923828, "distillation_loss": 7.001482963562012, "epoch": 1.5, "learning_rate": 3.9270619490399715e-05, "loss": 99.2743, "step": 1777, "task_loss": 3.0158238410949707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8762626166715816, "compression/movement_sparsity/importance_threshold": -0.0005670725501771374, "compression/movement_sparsity/linear_layer_sparsity": 0.7765288046870946, "compression/movement_sparsity/model_sparsity": 0.7498526513640712, "compression_loss": 94.15103149414062, "distillation_loss": 6.338566303253174, "epoch": 1.5, "learning_rate": 3.926458157227388e-05, "loss": 99.3622, "step": 1778, "task_loss": 3.2738637924194336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.876890724014488, "compression/movement_sparsity/importance_threshold": -0.000564194014821484, "compression/movement_sparsity/linear_layer_sparsity": 0.7771747726204322, "compression/movement_sparsity/model_sparsity": 0.7504764283116688, "compression_loss": 94.21788024902344, "distillation_loss": 4.0920634269714355, "epoch": 1.5, "learning_rate": 3.925854365414805e-05, "loss": 98.9875, "step": 1779, "task_loss": 1.8505133390426636 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8775167021820031, "compression/movement_sparsity/importance_threshold": -0.0005613252372034389, "compression/movement_sparsity/linear_layer_sparsity": 0.7777758460626207, "compression/movement_sparsity/model_sparsity": 0.7510568530320001, "compression_loss": 94.28461456298828, "distillation_loss": 4.469675064086914, "epoch": 1.5, "learning_rate": 3.925250573602222e-05, "loss": 99.2119, "step": 1780, "task_loss": 3.207858085632324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8781405547890256, "compression/movement_sparsity/importance_threshold": -0.000558466200756386, "compression/movement_sparsity/linear_layer_sparsity": 0.7783357215056275, "compression/movement_sparsity/model_sparsity": 0.7515974950311611, "compression_loss": 94.35110473632812, "distillation_loss": 5.630843162536621, "epoch": 1.51, "learning_rate": 3.924646781789639e-05, "loss": 99.4934, "step": 1781, "task_loss": 2.8934242725372314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8787622854504545, "compression/movement_sparsity/importance_threshold": -0.0005556168889137066, "compression/movement_sparsity/linear_layer_sparsity": 0.7788741334468896, "compression/movement_sparsity/model_sparsity": 0.752117410865892, "compression_loss": 94.41726684570312, "distillation_loss": 6.1225104331970215, "epoch": 1.51, "learning_rate": 3.924042989977056e-05, "loss": 100.4602, "step": 1782, "task_loss": 3.2528676986694336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8793818977811886, "compression/movement_sparsity/importance_threshold": -0.0005527772851087838, "compression/movement_sparsity/linear_layer_sparsity": 0.7794818367262837, "compression/movement_sparsity/model_sparsity": 0.752704237668125, "compression_loss": 94.48322296142578, "distillation_loss": 5.375153541564941, "epoch": 1.51, "learning_rate": 3.923439198164473e-05, "loss": 99.892, "step": 1783, "task_loss": 2.4736216068267822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8799993953961266, "compression/movement_sparsity/importance_threshold": -0.0005499473727750004, "compression/movement_sparsity/linear_layer_sparsity": 0.7802828426872228, "compression/movement_sparsity/model_sparsity": 0.7534777266101234, "compression_loss": 94.54907989501953, "distillation_loss": 5.554371356964111, "epoch": 1.51, "learning_rate": 3.92283540635189e-05, "loss": 99.8289, "step": 1784, "task_loss": 2.4489150047302246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8806147819101673, "compression/movement_sparsity/importance_threshold": -0.0005471271353457386, "compression/movement_sparsity/linear_layer_sparsity": 0.7808717892509257, "compression/movement_sparsity/model_sparsity": 0.7540464410475515, "compression_loss": 94.614501953125, "distillation_loss": 4.731574058532715, "epoch": 1.51, "learning_rate": 3.9222316145393066e-05, "loss": 99.549, "step": 1785, "task_loss": 1.7897518873214722 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8812280609382095, "compression/movement_sparsity/importance_threshold": -0.0005443165562543824, "compression/movement_sparsity/linear_layer_sparsity": 0.7814055269184745, "compression/movement_sparsity/model_sparsity": 0.7545618431842509, "compression_loss": 94.67981719970703, "distillation_loss": 4.7696638107299805, "epoch": 1.51, "learning_rate": 3.921627822726724e-05, "loss": 99.3332, "step": 1786, "task_loss": 3.7450003623962402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.881839236095152, "compression/movement_sparsity/importance_threshold": -0.0005415156189343129, "compression/movement_sparsity/linear_layer_sparsity": 0.7821087189322967, "compression/movement_sparsity/model_sparsity": 0.7552408783891266, "compression_loss": 94.74486541748047, "distillation_loss": 3.7556991577148438, "epoch": 1.51, "learning_rate": 3.9210240309141414e-05, "loss": 99.9517, "step": 1787, "task_loss": 1.805461049079895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8824483109958936, "compression/movement_sparsity/importance_threshold": -0.0005387243068189131, "compression/movement_sparsity/linear_layer_sparsity": 0.7826895689861748, "compression/movement_sparsity/model_sparsity": 0.7558017744567502, "compression_loss": 94.80976867675781, "distillation_loss": 4.164492607116699, "epoch": 1.51, "learning_rate": 3.920420239101558e-05, "loss": 99.3374, "step": 1788, "task_loss": 3.595012903213501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8830552892553332, "compression/movement_sparsity/importance_threshold": -0.000535942603341567, "compression/movement_sparsity/linear_layer_sparsity": 0.7834565552968488, "compression/movement_sparsity/model_sparsity": 0.7565424124281267, "compression_loss": 94.87434387207031, "distillation_loss": 5.839162826538086, "epoch": 1.51, "learning_rate": 3.919816447288975e-05, "loss": 100.3784, "step": 1789, "task_loss": 2.7945501804351807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8836601744883694, "compression/movement_sparsity/importance_threshold": -0.0005331704919356549, "compression/movement_sparsity/linear_layer_sparsity": 0.7842255806401885, "compression/movement_sparsity/model_sparsity": 0.757285019385124, "compression_loss": 94.93871307373047, "distillation_loss": 6.703171730041504, "epoch": 1.51, "learning_rate": 3.919212655476392e-05, "loss": 99.8694, "step": 1790, "task_loss": 2.766188144683838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8842629703099012, "compression/movement_sparsity/importance_threshold": -0.0005304079560345615, "compression/movement_sparsity/linear_layer_sparsity": 0.7848506812801633, "compression/movement_sparsity/model_sparsity": 0.7578886458950812, "compression_loss": 95.00287628173828, "distillation_loss": 4.843336582183838, "epoch": 1.51, "learning_rate": 3.918608863663809e-05, "loss": 99.6026, "step": 1791, "task_loss": 2.414226770401001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8848636803348271, "compression/movement_sparsity/importance_threshold": -0.000527654979071669, "compression/movement_sparsity/linear_layer_sparsity": 0.7854117610641013, "compression/movement_sparsity/model_sparsity": 0.7584304508623576, "compression_loss": 95.06681060791016, "distillation_loss": 5.003528118133545, "epoch": 1.51, "learning_rate": 3.9180050718512256e-05, "loss": 99.2725, "step": 1792, "task_loss": 3.0047993659973145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8854623081780463, "compression/movement_sparsity/importance_threshold": -0.0005249115444803595, "compression/movement_sparsity/linear_layer_sparsity": 0.786001399229527, "compression/movement_sparsity/model_sparsity": 0.7589998331428618, "compression_loss": 95.13052368164062, "distillation_loss": 4.433448791503906, "epoch": 1.52, "learning_rate": 3.917401280038643e-05, "loss": 100.1023, "step": 1793, "task_loss": 1.5658273696899414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8860588574544573, "compression/movement_sparsity/importance_threshold": -0.0005221776356940152, "compression/movement_sparsity/linear_layer_sparsity": 0.7865681906897626, "compression/movement_sparsity/model_sparsity": 0.7595471535727837, "compression_loss": 95.1939697265625, "distillation_loss": 3.340245008468628, "epoch": 1.52, "learning_rate": 3.91679748822606e-05, "loss": 99.956, "step": 1794, "task_loss": 1.7454278469085693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.886653331778959, "compression/movement_sparsity/importance_threshold": -0.0005194532361460207, "compression/movement_sparsity/linear_layer_sparsity": 0.7872127515713191, "compression/movement_sparsity/model_sparsity": 0.7601695718051577, "compression_loss": 95.25724792480469, "distillation_loss": 5.4439873695373535, "epoch": 1.52, "learning_rate": 3.9161936964134765e-05, "loss": 101.2555, "step": 1795, "task_loss": 3.4998648166656494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8872457347664502, "compression/movement_sparsity/importance_threshold": -0.0005167383292697574, "compression/movement_sparsity/linear_layer_sparsity": 0.78784891783886, "compression/movement_sparsity/model_sparsity": 0.7607838838043322, "compression_loss": 95.32023620605469, "distillation_loss": 4.835203647613525, "epoch": 1.52, "learning_rate": 3.915589904600894e-05, "loss": 100.8801, "step": 1796, "task_loss": 2.917163848876953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8878360700318296, "compression/movement_sparsity/importance_threshold": -0.0005140328984986082, "compression/movement_sparsity/linear_layer_sparsity": 0.7884935025687518, "compression/movement_sparsity/model_sparsity": 0.7614063250657777, "compression_loss": 95.38301086425781, "distillation_loss": 4.4107561111450195, "epoch": 1.52, "learning_rate": 3.9149861127883106e-05, "loss": 100.5832, "step": 1797, "task_loss": 1.9992985725402832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8884243411899961, "compression/movement_sparsity/importance_threshold": -0.0005113369272659552, "compression/movement_sparsity/linear_layer_sparsity": 0.7889913484917168, "compression/movement_sparsity/model_sparsity": 0.7618870684497355, "compression_loss": 95.44564056396484, "distillation_loss": 4.381132125854492, "epoch": 1.52, "learning_rate": 3.914382320975728e-05, "loss": 100.8314, "step": 1798, "task_loss": 3.0137410163879395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8890105518558485, "compression/movement_sparsity/importance_threshold": -0.0005086503990051816, "compression/movement_sparsity/linear_layer_sparsity": 0.7896002799603774, "compression/movement_sparsity/model_sparsity": 0.7624750812491553, "compression_loss": 95.5080337524414, "distillation_loss": 5.229893684387207, "epoch": 1.52, "learning_rate": 3.913778529163145e-05, "loss": 99.6126, "step": 1799, "task_loss": 3.0018956661224365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8895947056442857, "compression/movement_sparsity/importance_threshold": -0.0005059732971496702, "compression/movement_sparsity/linear_layer_sparsity": 0.7902220060609113, "compression/movement_sparsity/model_sparsity": 0.7630754491454826, "compression_loss": 95.57022094726562, "distillation_loss": 7.246957302093506, "epoch": 1.52, "learning_rate": 3.9131747373505614e-05, "loss": 100.9142, "step": 1800, "task_loss": 3.134300470352173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8901768061702062, "compression/movement_sparsity/importance_threshold": -0.0005033056051328031, "compression/movement_sparsity/linear_layer_sparsity": 0.7908226025363945, "compression/movement_sparsity/model_sparsity": 0.7636554132843821, "compression_loss": 95.63214874267578, "distillation_loss": 5.951671123504639, "epoch": 1.52, "learning_rate": 3.912570945537979e-05, "loss": 100.8358, "step": 1801, "task_loss": 3.1455202102661133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8907568570485089, "compression/movement_sparsity/importance_threshold": -0.0005006473063879635, "compression/movement_sparsity/linear_layer_sparsity": 0.7914397855290591, "compression/movement_sparsity/model_sparsity": 0.7642513941425718, "compression_loss": 95.69386291503906, "distillation_loss": 4.8524489402771, "epoch": 1.52, "learning_rate": 3.9119671537253955e-05, "loss": 101.027, "step": 1802, "task_loss": 3.020069122314453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8913348618940928, "compression/movement_sparsity/importance_threshold": -0.0004979983843485343, "compression/movement_sparsity/linear_layer_sparsity": 0.792043303425613, "compression/movement_sparsity/model_sparsity": 0.7648341793427409, "compression_loss": 95.75533294677734, "distillation_loss": 3.867804765701294, "epoch": 1.52, "learning_rate": 3.911363361912813e-05, "loss": 100.9363, "step": 1803, "task_loss": 2.6909990310668945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8919108243218565, "compression/movement_sparsity/importance_threshold": -0.0004953588224478975, "compression/movement_sparsity/linear_layer_sparsity": 0.7926383909356484, "compression/movement_sparsity/model_sparsity": 0.7654088237661032, "compression_loss": 95.81658935546875, "distillation_loss": 6.950314044952393, "epoch": 1.52, "learning_rate": 3.9107595701002296e-05, "loss": 101.5201, "step": 1804, "task_loss": 3.1630828380584717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8924847479466989, "compression/movement_sparsity/importance_threshold": -0.0004927286041194363, "compression/movement_sparsity/linear_layer_sparsity": 0.7931331961958662, "compression/movement_sparsity/model_sparsity": 0.7658866309434335, "compression_loss": 95.87770080566406, "distillation_loss": 7.5065226554870605, "epoch": 1.53, "learning_rate": 3.9101557782876464e-05, "loss": 101.7347, "step": 1805, "task_loss": 5.303219318389893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8930566363835187, "compression/movement_sparsity/importance_threshold": -0.0004901077127965328, "compression/movement_sparsity/linear_layer_sparsity": 0.7935757616776712, "compression/movement_sparsity/model_sparsity": 0.7663139929394479, "compression_loss": 95.93848419189453, "distillation_loss": 3.969412326812744, "epoch": 1.53, "learning_rate": 3.909551986475064e-05, "loss": 100.4651, "step": 1806, "task_loss": 2.3589251041412354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8936264932472149, "compression/movement_sparsity/importance_threshold": -0.000487496131912569, "compression/movement_sparsity/linear_layer_sparsity": 0.7942103420309166, "compression/movement_sparsity/model_sparsity": 0.7669267735053618, "compression_loss": 95.99911499023438, "distillation_loss": 6.094596862792969, "epoch": 1.53, "learning_rate": 3.9089481946624805e-05, "loss": 101.1781, "step": 1807, "task_loss": 3.622671127319336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8941943221526859, "compression/movement_sparsity/importance_threshold": -0.0004848938449009306, "compression/movement_sparsity/linear_layer_sparsity": 0.7948530665906572, "compression/movement_sparsity/model_sparsity": 0.7675474184992233, "compression_loss": 96.05949401855469, "distillation_loss": 6.205593109130859, "epoch": 1.53, "learning_rate": 3.908344402849898e-05, "loss": 101.0317, "step": 1808, "task_loss": 3.898811101913452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8947601267148309, "compression/movement_sparsity/importance_threshold": -0.0004823008351949971, "compression/movement_sparsity/linear_layer_sparsity": 0.7953382490475929, "compression/movement_sparsity/model_sparsity": 0.7680159334461674, "compression_loss": 96.11962890625, "distillation_loss": 5.550574779510498, "epoch": 1.53, "learning_rate": 3.9077406110373146e-05, "loss": 101.0755, "step": 1809, "task_loss": 4.310842037200928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8953239105485484, "compression/movement_sparsity/importance_threshold": -0.0004797170862281524, "compression/movement_sparsity/linear_layer_sparsity": 0.7960092696571335, "compression/movement_sparsity/model_sparsity": 0.7686639024334694, "compression_loss": 96.17952728271484, "distillation_loss": 4.930641174316406, "epoch": 1.53, "learning_rate": 3.907136819224731e-05, "loss": 100.9146, "step": 1810, "task_loss": 2.3638668060302734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8958856772687375, "compression/movement_sparsity/importance_threshold": -0.00047714258143377857, "compression/movement_sparsity/linear_layer_sparsity": 0.7965548480231447, "compression/movement_sparsity/model_sparsity": 0.7691907385042127, "compression_loss": 96.2392807006836, "distillation_loss": 4.882296562194824, "epoch": 1.53, "learning_rate": 3.906533027412149e-05, "loss": 101.7939, "step": 1811, "task_loss": 3.2334132194519043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8964454304902967, "compression/movement_sparsity/importance_threshold": -0.0004745773042452595, "compression/movement_sparsity/linear_layer_sparsity": 0.797093319585245, "compression/movement_sparsity/model_sparsity": 0.7697107119116227, "compression_loss": 96.2987289428711, "distillation_loss": 4.666656494140625, "epoch": 1.53, "learning_rate": 3.9059292355995654e-05, "loss": 101.0247, "step": 1812, "task_loss": 3.0006890296936035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.897003173828125, "compression/movement_sparsity/importance_threshold": -0.0004720212380959765, "compression/movement_sparsity/linear_layer_sparsity": 0.7976573923352596, "compression/movement_sparsity/model_sparsity": 0.7702554070273834, "compression_loss": 96.35802459716797, "distillation_loss": 6.677635669708252, "epoch": 1.53, "learning_rate": 3.905325443786982e-05, "loss": 101.2986, "step": 1813, "task_loss": 2.4046804904937744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8975589108971211, "compression/movement_sparsity/importance_threshold": -0.00046947436641931253, "compression/movement_sparsity/linear_layer_sparsity": 0.7982528256461563, "compression/movement_sparsity/model_sparsity": 0.7708303853722838, "compression_loss": 96.41710662841797, "distillation_loss": 5.800105571746826, "epoch": 1.53, "learning_rate": 3.9047216519743995e-05, "loss": 101.219, "step": 1814, "task_loss": 3.045485258102417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8981126453121838, "compression/movement_sparsity/importance_threshold": -0.00046693667264865145, "compression/movement_sparsity/linear_layer_sparsity": 0.7988458979718612, "compression/movement_sparsity/model_sparsity": 0.7714030838390968, "compression_loss": 96.47602844238281, "distillation_loss": 5.970515727996826, "epoch": 1.53, "learning_rate": 3.904117860161816e-05, "loss": 101.8202, "step": 1815, "task_loss": 3.7559406757354736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.898664380688212, "compression/movement_sparsity/importance_threshold": -0.0004644081402173754, "compression/movement_sparsity/linear_layer_sparsity": 0.7993842979889557, "compression/movement_sparsity/model_sparsity": 0.771922988159292, "compression_loss": 96.53461456298828, "distillation_loss": 5.30567741394043, "epoch": 1.53, "learning_rate": 3.903514068349233e-05, "loss": 101.522, "step": 1816, "task_loss": 3.0806734561920166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8992141206401043, "compression/movement_sparsity/importance_threshold": -0.0004618887525588656, "compression/movement_sparsity/linear_layer_sparsity": 0.7999535458277242, "compression/movement_sparsity/model_sparsity": 0.7724726805835875, "compression_loss": 96.59308624267578, "distillation_loss": 4.874414443969727, "epoch": 1.54, "learning_rate": 3.9029102765366504e-05, "loss": 101.56, "step": 1817, "task_loss": 2.2424914836883545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8997618687827597, "compression/movement_sparsity/importance_threshold": -0.0004593784931065069, "compression/movement_sparsity/linear_layer_sparsity": 0.8004856141118041, "compression/movement_sparsity/model_sparsity": 0.7729864706852757, "compression_loss": 96.6512451171875, "distillation_loss": 6.177508354187012, "epoch": 1.54, "learning_rate": 3.902306484724068e-05, "loss": 101.7776, "step": 1818, "task_loss": 2.367758274078369 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9003076287310768, "compression/movement_sparsity/importance_threshold": -0.00045687734529367956, "compression/movement_sparsity/linear_layer_sparsity": 0.8011592461140569, "compression/movement_sparsity/model_sparsity": 0.7736369613559168, "compression_loss": 96.70924377441406, "distillation_loss": 4.558121204376221, "epoch": 1.54, "learning_rate": 3.9017026929114845e-05, "loss": 102.4566, "step": 1819, "task_loss": 1.7423453330993652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9008514040999546, "compression/movement_sparsity/importance_threshold": -0.00045438529255376926, "compression/movement_sparsity/linear_layer_sparsity": 0.8017764648792244, "compression/movement_sparsity/model_sparsity": 0.7742329767577137, "compression_loss": 96.76702880859375, "distillation_loss": 5.412147521972656, "epoch": 1.54, "learning_rate": 3.901098901098901e-05, "loss": 101.7015, "step": 1820, "task_loss": 2.934189558029175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9013931985042918, "compression/movement_sparsity/importance_threshold": -0.0004519023183201555, "compression/movement_sparsity/linear_layer_sparsity": 0.8022504147702471, "compression/movement_sparsity/model_sparsity": 0.7746906450119394, "compression_loss": 96.82456970214844, "distillation_loss": 5.886987686157227, "epoch": 1.54, "learning_rate": 3.9004951092863186e-05, "loss": 102.1558, "step": 1821, "task_loss": 2.6318373680114746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9019330155589871, "compression/movement_sparsity/importance_threshold": -0.00044942840602622304, "compression/movement_sparsity/linear_layer_sparsity": 0.8027608049175649, "compression/movement_sparsity/model_sparsity": 0.7751835016875531, "compression_loss": 96.88190460205078, "distillation_loss": 5.618556976318359, "epoch": 1.54, "learning_rate": 3.899891317473735e-05, "loss": 101.5452, "step": 1822, "task_loss": 3.0735456943511963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9024708588789395, "compression/movement_sparsity/importance_threshold": -0.000446963539105354, "compression/movement_sparsity/linear_layer_sparsity": 0.8031703523791862, "compression/movement_sparsity/model_sparsity": 0.7755789799339524, "compression_loss": 96.93904113769531, "distillation_loss": 6.511363983154297, "epoch": 1.54, "learning_rate": 3.899287525661152e-05, "loss": 102.9877, "step": 1823, "task_loss": 2.3529675006866455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9030067320790477, "compression/movement_sparsity/importance_threshold": -0.00044450770099093047, "compression/movement_sparsity/linear_layer_sparsity": 0.803626058293726, "compression/movement_sparsity/model_sparsity": 0.7760190309484124, "compression_loss": 96.99597930908203, "distillation_loss": 6.5896477699279785, "epoch": 1.54, "learning_rate": 3.8986837338485694e-05, "loss": 102.3717, "step": 1824, "task_loss": 2.888782024383545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9035406387742103, "compression/movement_sparsity/importance_threshold": -0.00044206087511633636, "compression/movement_sparsity/linear_layer_sparsity": 0.8040962043752728, "compression/movement_sparsity/model_sparsity": 0.7764730260657196, "compression_loss": 97.05265045166016, "distillation_loss": 5.855340003967285, "epoch": 1.54, "learning_rate": 3.898079942035986e-05, "loss": 102.949, "step": 1825, "task_loss": 2.779860019683838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9040725825793265, "compression/movement_sparsity/importance_threshold": -0.00043962304491495293, "compression/movement_sparsity/linear_layer_sparsity": 0.8046168493067575, "compression/movement_sparsity/model_sparsity": 0.7769757852421165, "compression_loss": 97.10911560058594, "distillation_loss": 5.044782638549805, "epoch": 1.54, "learning_rate": 3.897476150223403e-05, "loss": 102.2264, "step": 1826, "task_loss": 2.2214515209198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9046025671092948, "compression/movement_sparsity/importance_threshold": -0.0004371941938201649, "compression/movement_sparsity/linear_layer_sparsity": 0.8052114717742551, "compression/movement_sparsity/model_sparsity": 0.7775499805985829, "compression_loss": 97.16541290283203, "distillation_loss": 5.242118835449219, "epoch": 1.54, "learning_rate": 3.89687235841082e-05, "loss": 102.2095, "step": 1827, "task_loss": 2.3697235584259033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9051305959790141, "compression/movement_sparsity/importance_threshold": -0.0004347743052653518, "compression/movement_sparsity/linear_layer_sparsity": 0.8059177998441655, "compression/movement_sparsity/model_sparsity": 0.7782320441263726, "compression_loss": 97.2214584350586, "distillation_loss": 4.680717945098877, "epoch": 1.54, "learning_rate": 3.8962685665982377e-05, "loss": 102.3858, "step": 1828, "task_loss": 1.9834247827529907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9056566728033831, "compression/movement_sparsity/importance_threshold": -0.00043236336268389925, "compression/movement_sparsity/linear_layer_sparsity": 0.8065394067030229, "compression/movement_sparsity/model_sparsity": 0.7788322968773419, "compression_loss": 97.27731323242188, "distillation_loss": 5.345104217529297, "epoch": 1.55, "learning_rate": 3.895664774785654e-05, "loss": 102.3378, "step": 1829, "task_loss": 2.1498639583587646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9061808011973007, "compression/movement_sparsity/importance_threshold": -0.0004299613495091885, "compression/movement_sparsity/linear_layer_sparsity": 0.8071089049493118, "compression/movement_sparsity/model_sparsity": 0.7793822311068892, "compression_loss": 97.33295440673828, "distillation_loss": 4.92858362197876, "epoch": 1.55, "learning_rate": 3.895060982973071e-05, "loss": 102.5556, "step": 1830, "task_loss": 2.9905648231506348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9067029847756658, "compression/movement_sparsity/importance_threshold": -0.00042756824917460166, "compression/movement_sparsity/linear_layer_sparsity": 0.8075549999847371, "compression/movement_sparsity/model_sparsity": 0.7798130014054988, "compression_loss": 97.38838195800781, "distillation_loss": 5.501850128173828, "epoch": 1.55, "learning_rate": 3.8944571911604885e-05, "loss": 103.2714, "step": 1831, "task_loss": 3.352752923965454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9072232271533769, "compression/movement_sparsity/importance_threshold": -0.0004251840451135235, "compression/movement_sparsity/linear_layer_sparsity": 0.8079880619049363, "compression/movement_sparsity/model_sparsity": 0.7802311863164849, "compression_loss": 97.44354248046875, "distillation_loss": 5.221872806549072, "epoch": 1.55, "learning_rate": 3.8938533993479045e-05, "loss": 103.153, "step": 1832, "task_loss": 2.9519875049591064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.907741531945333, "compression/movement_sparsity/importance_threshold": -0.00042280872075933437, "compression/movement_sparsity/linear_layer_sparsity": 0.8085470191870351, "compression/movement_sparsity/model_sparsity": 0.7807709416963898, "compression_loss": 97.49856567382812, "distillation_loss": 5.717840194702148, "epoch": 1.55, "learning_rate": 3.893249607535322e-05, "loss": 103.1366, "step": 1833, "task_loss": 2.571096658706665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9082579027664329, "compression/movement_sparsity/importance_threshold": -0.0004204422595454182, "compression/movement_sparsity/linear_layer_sparsity": 0.8091046528865262, "compression/movement_sparsity/model_sparsity": 0.7813094189628215, "compression_loss": 97.55327606201172, "distillation_loss": 5.424787998199463, "epoch": 1.55, "learning_rate": 3.892645815722739e-05, "loss": 102.7801, "step": 1834, "task_loss": 2.941086530685425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9087723432315753, "compression/movement_sparsity/importance_threshold": -0.0004180846449051579, "compression/movement_sparsity/linear_layer_sparsity": 0.8098009050347842, "compression/movement_sparsity/model_sparsity": 0.7819817527078647, "compression_loss": 97.60784149169922, "distillation_loss": 5.340180397033691, "epoch": 1.55, "learning_rate": 3.892042023910156e-05, "loss": 102.6289, "step": 1835, "task_loss": 2.8425111770629883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9092848569556591, "compression/movement_sparsity/importance_threshold": -0.00041573586027193483, "compression/movement_sparsity/linear_layer_sparsity": 0.8103723707687329, "compression/movement_sparsity/model_sparsity": 0.7825335868358181, "compression_loss": 97.66212463378906, "distillation_loss": 7.0512847900390625, "epoch": 1.55, "learning_rate": 3.891438232097573e-05, "loss": 103.8611, "step": 1836, "task_loss": 3.9132165908813477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.909795447553583, "compression/movement_sparsity/importance_threshold": -0.0004133958890791328, "compression/movement_sparsity/linear_layer_sparsity": 0.8106689009695016, "compression/movement_sparsity/model_sparsity": 0.7828199303119567, "compression_loss": 97.71627807617188, "distillation_loss": 5.081141471862793, "epoch": 1.55, "learning_rate": 3.89083444028499e-05, "loss": 102.5006, "step": 1837, "task_loss": 1.7006652355194092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.910304118640246, "compression/movement_sparsity/importance_threshold": -0.00041106471476013394, "compression/movement_sparsity/linear_layer_sparsity": 0.8113741558643247, "compression/movement_sparsity/model_sparsity": 0.7835009575315248, "compression_loss": 97.77022552490234, "distillation_loss": 6.750747203826904, "epoch": 1.55, "learning_rate": 3.890230648472407e-05, "loss": 103.2681, "step": 1838, "task_loss": 3.981041669845581 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9108108738305465, "compression/movement_sparsity/importance_threshold": -0.00040874232074832213, "compression/movement_sparsity/linear_layer_sparsity": 0.8119041254949007, "compression/movement_sparsity/model_sparsity": 0.7840127210749133, "compression_loss": 97.82398986816406, "distillation_loss": 7.843149662017822, "epoch": 1.55, "learning_rate": 3.8896268566598236e-05, "loss": 103.4702, "step": 1839, "task_loss": 4.059089183807373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9113157167393836, "compression/movement_sparsity/importance_threshold": -0.00040642869047707864, "compression/movement_sparsity/linear_layer_sparsity": 0.8123333239847859, "compression/movement_sparsity/model_sparsity": 0.7844271752763019, "compression_loss": 97.8775405883789, "distillation_loss": 5.516822338104248, "epoch": 1.56, "learning_rate": 3.889023064847241e-05, "loss": 102.6542, "step": 1840, "task_loss": 3.025583267211914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9118186509816562, "compression/movement_sparsity/importance_threshold": -0.0004041238073797856, "compression/movement_sparsity/linear_layer_sparsity": 0.8128336620587867, "compression/movement_sparsity/model_sparsity": 0.7849103251982409, "compression_loss": 97.93094635009766, "distillation_loss": 6.51788330078125, "epoch": 1.56, "learning_rate": 3.888419273034658e-05, "loss": 103.6652, "step": 1841, "task_loss": 3.551455020904541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9123196801722627, "compression/movement_sparsity/importance_threshold": -0.0004018276548898277, "compression/movement_sparsity/linear_layer_sparsity": 0.8133253551112515, "compression/movement_sparsity/model_sparsity": 0.7853851270817288, "compression_loss": 97.98408508300781, "distillation_loss": 6.16136360168457, "epoch": 1.56, "learning_rate": 3.8878154812220744e-05, "loss": 104.1841, "step": 1842, "task_loss": 3.2871904373168945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9128188079261023, "compression/movement_sparsity/importance_threshold": -0.00039954021644058625, "compression/movement_sparsity/linear_layer_sparsity": 0.8137638902243957, "compression/movement_sparsity/model_sparsity": 0.7858085971646446, "compression_loss": 98.03705596923828, "distillation_loss": 5.868343353271484, "epoch": 1.56, "learning_rate": 3.887211689409492e-05, "loss": 102.9572, "step": 1843, "task_loss": 1.4100096225738525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9133160378580736, "compression/movement_sparsity/importance_threshold": -0.0003972614754654434, "compression/movement_sparsity/linear_layer_sparsity": 0.8140858665988993, "compression/movement_sparsity/model_sparsity": 0.7861195126601688, "compression_loss": 98.08977508544922, "distillation_loss": 5.980761528015137, "epoch": 1.56, "learning_rate": 3.886607897596909e-05, "loss": 103.8481, "step": 1844, "task_loss": 2.4828414916992188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9138113735830753, "compression/movement_sparsity/importance_threshold": -0.0003949914153977838, "compression/movement_sparsity/linear_layer_sparsity": 0.8144787559983333, "compression/movement_sparsity/model_sparsity": 0.7864989051000632, "compression_loss": 98.14227294921875, "distillation_loss": 4.772604942321777, "epoch": 1.56, "learning_rate": 3.886004105784325e-05, "loss": 103.2206, "step": 1845, "task_loss": 3.561566114425659 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9143048187160064, "compression/movement_sparsity/importance_threshold": -0.0003927300196709879, "compression/movement_sparsity/linear_layer_sparsity": 0.8149324109560397, "compression/movement_sparsity/model_sparsity": 0.7869369756143664, "compression_loss": 98.1946029663086, "distillation_loss": 5.977004528045654, "epoch": 1.56, "learning_rate": 3.8854003139717427e-05, "loss": 103.9529, "step": 1846, "task_loss": 2.1944656372070312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9147963768717657, "compression/movement_sparsity/importance_threshold": -0.0003904772717184396, "compression/movement_sparsity/linear_layer_sparsity": 0.8154193581897854, "compression/movement_sparsity/model_sparsity": 0.7874071947126081, "compression_loss": 98.24673461914062, "distillation_loss": 6.082143306732178, "epoch": 1.56, "learning_rate": 3.88479652215916e-05, "loss": 103.8563, "step": 1847, "task_loss": 3.212897777557373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9152860516652519, "compression/movement_sparsity/importance_threshold": -0.000388233154973521, "compression/movement_sparsity/linear_layer_sparsity": 0.8158632949508686, "compression/movement_sparsity/model_sparsity": 0.7878358808802388, "compression_loss": 98.29867553710938, "distillation_loss": 6.68058967590332, "epoch": 1.56, "learning_rate": 3.884192730346577e-05, "loss": 103.5036, "step": 1848, "task_loss": 2.6763534545898438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9157738467113636, "compression/movement_sparsity/importance_threshold": -0.0003859976528696168, "compression/movement_sparsity/linear_layer_sparsity": 0.8163402378079678, "compression/movement_sparsity/model_sparsity": 0.7882964392829489, "compression_loss": 98.35039520263672, "distillation_loss": 4.855503082275391, "epoch": 1.56, "learning_rate": 3.8835889385339935e-05, "loss": 102.7795, "step": 1849, "task_loss": 2.8051412105560303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.916259765625, "compression/movement_sparsity/importance_threshold": -0.00038377074884010653, "compression/movement_sparsity/linear_layer_sparsity": 0.8168623494120718, "compression/movement_sparsity/model_sparsity": 0.7888006147472486, "compression_loss": 98.40192413330078, "distillation_loss": 6.279828071594238, "epoch": 1.56, "learning_rate": 3.882985146721411e-05, "loss": 103.2826, "step": 1850, "task_loss": 3.33174729347229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9167438120210597, "compression/movement_sparsity/importance_threshold": -0.0003815524263183741, "compression/movement_sparsity/linear_layer_sparsity": 0.8173370505256554, "compression/movement_sparsity/model_sparsity": 0.7892590084172293, "compression_loss": 98.45327758789062, "distillation_loss": 5.2572784423828125, "epoch": 1.56, "learning_rate": 3.8823813549088276e-05, "loss": 103.2418, "step": 1851, "task_loss": 3.187087297439575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9172259895144413, "compression/movement_sparsity/importance_threshold": -0.00037934266873780424, "compression/movement_sparsity/linear_layer_sparsity": 0.8177970491405442, "compression/movement_sparsity/model_sparsity": 0.7897032046645753, "compression_loss": 98.50434112548828, "distillation_loss": 5.639957427978516, "epoch": 1.57, "learning_rate": 3.881777563096244e-05, "loss": 103.0592, "step": 1852, "task_loss": 2.866384983062744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.917706301720044, "compression/movement_sparsity/importance_threshold": -0.00037714145953177645, "compression/movement_sparsity/linear_layer_sparsity": 0.8182511929891236, "compression/movement_sparsity/model_sparsity": 0.7901417472748462, "compression_loss": 98.55535125732422, "distillation_loss": 4.712489604949951, "epoch": 1.57, "learning_rate": 3.881173771283662e-05, "loss": 103.5007, "step": 1853, "task_loss": 2.946272850036621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9181847522527664, "compression/movement_sparsity/importance_threshold": -0.00037494878213367375, "compression/movement_sparsity/linear_layer_sparsity": 0.8186043629861626, "compression/movement_sparsity/model_sparsity": 0.7904827847960089, "compression_loss": 98.60606384277344, "distillation_loss": 5.082883358001709, "epoch": 1.57, "learning_rate": 3.8805699794710784e-05, "loss": 103.9542, "step": 1854, "task_loss": 3.1790356636047363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9186613447275073, "compression/movement_sparsity/importance_threshold": -0.00037276461997688085, "compression/movement_sparsity/linear_layer_sparsity": 0.8191223369040969, "compression/movement_sparsity/model_sparsity": 0.790982964716388, "compression_loss": 98.65664672851562, "distillation_loss": 5.334721565246582, "epoch": 1.57, "learning_rate": 3.879966187658495e-05, "loss": 104.4039, "step": 1855, "task_loss": 3.2545597553253174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9191360827591653, "compression/movement_sparsity/importance_threshold": -0.0003705889564947799, "compression/movement_sparsity/linear_layer_sparsity": 0.81951492819934, "compression/movement_sparsity/model_sparsity": 0.7913620692928874, "compression_loss": 98.70698547363281, "distillation_loss": 3.8050832748413086, "epoch": 1.57, "learning_rate": 3.8793623958459125e-05, "loss": 103.2339, "step": 1856, "task_loss": 1.7775837182998657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9196089699626395, "compression/movement_sparsity/importance_threshold": -0.00036842177512075216, "compression/movement_sparsity/linear_layer_sparsity": 0.8200153855150172, "compression/movement_sparsity/model_sparsity": 0.7918453343601842, "compression_loss": 98.75708770751953, "distillation_loss": 6.942870616912842, "epoch": 1.57, "learning_rate": 3.878758604033329e-05, "loss": 104.6433, "step": 1857, "task_loss": 3.26145601272583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9200800099528286, "compression/movement_sparsity/importance_threshold": -0.0003662630592881815, "compression/movement_sparsity/linear_layer_sparsity": 0.8204100873879319, "compression/movement_sparsity/model_sparsity": 0.7922264770095194, "compression_loss": 98.80706787109375, "distillation_loss": 6.447836875915527, "epoch": 1.57, "learning_rate": 3.878154812220747e-05, "loss": 104.184, "step": 1858, "task_loss": 3.584217071533203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9205492063446312, "compression/movement_sparsity/importance_threshold": -0.0003641127924304509, "compression/movement_sparsity/linear_layer_sparsity": 0.8208686193302014, "compression/movement_sparsity/model_sparsity": 0.7926692569689626, "compression_loss": 98.85676574707031, "distillation_loss": 4.457553386688232, "epoch": 1.57, "learning_rate": 3.8775510204081634e-05, "loss": 104.0217, "step": 1859, "task_loss": 3.5561373233795166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9210165627529465, "compression/movement_sparsity/importance_threshold": -0.00036197095798094076, "compression/movement_sparsity/linear_layer_sparsity": 0.8213057593157321, "compression/movement_sparsity/model_sparsity": 0.7930913798511905, "compression_loss": 98.90633392333984, "distillation_loss": 5.567176818847656, "epoch": 1.57, "learning_rate": 3.876947228595581e-05, "loss": 104.752, "step": 1860, "task_loss": 4.12358283996582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9214820827926731, "compression/movement_sparsity/importance_threshold": -0.0003598375393730358, "compression/movement_sparsity/linear_layer_sparsity": 0.8217795541925755, "compression/movement_sparsity/model_sparsity": 0.7935488984164508, "compression_loss": 98.95571899414062, "distillation_loss": 4.741827964782715, "epoch": 1.57, "learning_rate": 3.8763434367829975e-05, "loss": 104.3962, "step": 1861, "task_loss": 1.9019246101379395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9219457700787095, "compression/movement_sparsity/importance_threshold": -0.00035771252004011996, "compression/movement_sparsity/linear_layer_sparsity": 0.8222263646780589, "compression/movement_sparsity/model_sparsity": 0.793980359587208, "compression_loss": 99.00482177734375, "distillation_loss": 6.058761119842529, "epoch": 1.57, "learning_rate": 3.875739644970414e-05, "loss": 104.4989, "step": 1862, "task_loss": 2.61719012260437 }, { "compression/movement_sparsity/importance_regularization_factor": 0.922407628225955, "compression/movement_sparsity/importance_threshold": -0.0003555958834155718, "compression/movement_sparsity/linear_layer_sparsity": 0.8226420173135098, "compression/movement_sparsity/model_sparsity": 0.7943817332759341, "compression_loss": 99.05381774902344, "distillation_loss": 5.216007232666016, "epoch": 1.57, "learning_rate": 3.8751358531578316e-05, "loss": 103.7992, "step": 1863, "task_loss": 2.5538148880004883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.922867660849308, "compression/movement_sparsity/importance_threshold": -0.00035348761293277787, "compression/movement_sparsity/linear_layer_sparsity": 0.8230852386245349, "compression/movement_sparsity/model_sparsity": 0.7948097285714172, "compression_loss": 99.10262298583984, "distillation_loss": 4.595757484436035, "epoch": 1.58, "learning_rate": 3.874532061345248e-05, "loss": 104.1822, "step": 1864, "task_loss": 3.164721965789795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9233258715636676, "compression/movement_sparsity/importance_threshold": -0.0003513876920251194, "compression/movement_sparsity/linear_layer_sparsity": 0.8236405948080076, "compression/movement_sparsity/model_sparsity": 0.7953460065615122, "compression_loss": 99.15121459960938, "distillation_loss": 5.235466480255127, "epoch": 1.58, "learning_rate": 3.873928269532665e-05, "loss": 104.6141, "step": 1865, "task_loss": 3.130291700363159 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9237822639839324, "compression/movement_sparsity/importance_threshold": -0.0003492961041259785, "compression/movement_sparsity/linear_layer_sparsity": 0.8240275936686294, "compression/movement_sparsity/model_sparsity": 0.795719710820724, "compression_loss": 99.1996078491211, "distillation_loss": 6.177048683166504, "epoch": 1.58, "learning_rate": 3.8733244777200824e-05, "loss": 104.1366, "step": 1866, "task_loss": 2.2680609226226807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9242368417250012, "compression/movement_sparsity/importance_threshold": -0.00034721283266873817, "compression/movement_sparsity/linear_layer_sparsity": 0.824487234558489, "compression/movement_sparsity/model_sparsity": 0.7961635616319962, "compression_loss": 99.24778747558594, "distillation_loss": 6.084298610687256, "epoch": 1.58, "learning_rate": 3.872720685907499e-05, "loss": 104.4489, "step": 1867, "task_loss": 2.633469581604004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9246896084017728, "compression/movement_sparsity/importance_threshold": -0.0003451378610867823, "compression/movement_sparsity/linear_layer_sparsity": 0.8250769919655913, "compression/movement_sparsity/model_sparsity": 0.7967330590578583, "compression_loss": 99.29580688476562, "distillation_loss": 5.572914123535156, "epoch": 1.58, "learning_rate": 3.8721168940949166e-05, "loss": 104.5987, "step": 1868, "task_loss": 2.128075122833252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9251405676291462, "compression/movement_sparsity/importance_threshold": -0.0003430711728134913, "compression/movement_sparsity/linear_layer_sparsity": 0.8254688916591115, "compression/movement_sparsity/model_sparsity": 0.7971114957912817, "compression_loss": 99.34359741210938, "distillation_loss": 6.683205604553223, "epoch": 1.58, "learning_rate": 3.871513102282333e-05, "loss": 104.7368, "step": 1869, "task_loss": 2.9196503162384033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9255897230220199, "compression/movement_sparsity/importance_threshold": -0.00034101275128224987, "compression/movement_sparsity/linear_layer_sparsity": 0.8260329167124555, "compression/movement_sparsity/model_sparsity": 0.7976561448488992, "compression_loss": 99.39125061035156, "distillation_loss": 4.974265098571777, "epoch": 1.58, "learning_rate": 3.87090931046975e-05, "loss": 104.7631, "step": 1870, "task_loss": 2.743682861328125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9260370781952929, "compression/movement_sparsity/importance_threshold": -0.0003389625799264393, "compression/movement_sparsity/linear_layer_sparsity": 0.826393992432637, "compression/movement_sparsity/model_sparsity": 0.7980048165072938, "compression_loss": 99.43866729736328, "distillation_loss": 5.291414260864258, "epoch": 1.58, "learning_rate": 3.8703055186571674e-05, "loss": 104.3856, "step": 1871, "task_loss": 2.9101314544677734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9264826367638639, "compression/movement_sparsity/importance_threshold": -0.0003369206421794434, "compression/movement_sparsity/linear_layer_sparsity": 0.8267979236113019, "compression/movement_sparsity/model_sparsity": 0.7983948714073339, "compression_loss": 99.48590087890625, "distillation_loss": 4.4817891120910645, "epoch": 1.58, "learning_rate": 3.869701726844584e-05, "loss": 104.2685, "step": 1872, "task_loss": 2.8886475563049316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9269264023426318, "compression/movement_sparsity/importance_threshold": -0.0003348869214746435, "compression/movement_sparsity/linear_layer_sparsity": 0.82722034917397, "compression/movement_sparsity/model_sparsity": 0.7988027853523912, "compression_loss": 99.53291320800781, "distillation_loss": 6.173472881317139, "epoch": 1.58, "learning_rate": 3.869097935032001e-05, "loss": 105.1196, "step": 1873, "task_loss": 3.5351691246032715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9273683785464952, "compression/movement_sparsity/importance_threshold": -0.00033286140124542344, "compression/movement_sparsity/linear_layer_sparsity": 0.8275438637660986, "compression/movement_sparsity/model_sparsity": 0.7991151862230329, "compression_loss": 99.57972717285156, "distillation_loss": 5.237267971038818, "epoch": 1.58, "learning_rate": 3.868494143219418e-05, "loss": 104.8299, "step": 1874, "task_loss": 2.890228509902954 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9278085689903532, "compression/movement_sparsity/importance_threshold": -0.0003308440649251645, "compression/movement_sparsity/linear_layer_sparsity": 0.8280418170065722, "compression/movement_sparsity/model_sparsity": 0.7995960332378129, "compression_loss": 99.62633514404297, "distillation_loss": 5.389639854431152, "epoch": 1.58, "learning_rate": 3.867890351406835e-05, "loss": 105.0787, "step": 1875, "task_loss": 3.483466148376465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9282469772891043, "compression/movement_sparsity/importance_threshold": -0.0003288348959472522, "compression/movement_sparsity/linear_layer_sparsity": 0.8284698707763645, "compression/movement_sparsity/model_sparsity": 0.8000093820437654, "compression_loss": 99.6727066040039, "distillation_loss": 5.651316165924072, "epoch": 1.59, "learning_rate": 3.867286559594252e-05, "loss": 104.9942, "step": 1876, "task_loss": 2.623163938522339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9286836070576474, "compression/movement_sparsity/importance_threshold": -0.0003268338777450662, "compression/movement_sparsity/linear_layer_sparsity": 0.8288303025914937, "compression/movement_sparsity/model_sparsity": 0.800357431917227, "compression_loss": 99.71892547607422, "distillation_loss": 6.2425312995910645, "epoch": 1.59, "learning_rate": 3.866682767781669e-05, "loss": 105.7607, "step": 1877, "task_loss": 2.771519422531128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9291184619108814, "compression/movement_sparsity/importance_threshold": -0.00032484099375199027, "compression/movement_sparsity/linear_layer_sparsity": 0.829204745303595, "compression/movement_sparsity/model_sparsity": 0.8007190113702471, "compression_loss": 99.76502227783203, "distillation_loss": 4.476393699645996, "epoch": 1.59, "learning_rate": 3.8660789759690865e-05, "loss": 104.6788, "step": 1878, "task_loss": 2.412442922592163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9295515454637049, "compression/movement_sparsity/importance_threshold": -0.00032285622740140744, "compression/movement_sparsity/linear_layer_sparsity": 0.8296514484715697, "compression/movement_sparsity/model_sparsity": 0.8011503689101822, "compression_loss": 99.81090545654297, "distillation_loss": 6.256775379180908, "epoch": 1.59, "learning_rate": 3.865475184156503e-05, "loss": 105.6109, "step": 1879, "task_loss": 3.593289852142334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9299828613310169, "compression/movement_sparsity/importance_threshold": -0.00032087956212669983, "compression/movement_sparsity/linear_layer_sparsity": 0.8300399497773138, "compression/movement_sparsity/model_sparsity": 0.8015255240009042, "compression_loss": 99.8565673828125, "distillation_loss": 5.608592987060547, "epoch": 1.59, "learning_rate": 3.86487139234392e-05, "loss": 105.4806, "step": 1880, "task_loss": 2.4908766746520996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.930412413127716, "compression/movement_sparsity/importance_threshold": -0.00031891098136125045, "compression/movement_sparsity/linear_layer_sparsity": 0.8304278310263408, "compression/movement_sparsity/model_sparsity": 0.8019000803357648, "compression_loss": 99.90208435058594, "distillation_loss": 5.178839683532715, "epoch": 1.59, "learning_rate": 3.864267600531337e-05, "loss": 104.9524, "step": 1881, "task_loss": 3.395655870437622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9308402044687012, "compression/movement_sparsity/importance_threshold": -0.0003169504685384423, "compression/movement_sparsity/linear_layer_sparsity": 0.8308079257939015, "compression/movement_sparsity/model_sparsity": 0.8022671176787516, "compression_loss": 99.94733428955078, "distillation_loss": 4.663600444793701, "epoch": 1.59, "learning_rate": 3.863663808718754e-05, "loss": 105.1991, "step": 1882, "task_loss": 1.5167597532272339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9312662389688712, "compression/movement_sparsity/importance_threshold": -0.0003149980070916566, "compression/movement_sparsity/linear_layer_sparsity": 0.8312291827881413, "compression/movement_sparsity/model_sparsity": 0.8026739031993011, "compression_loss": 99.9924545288086, "distillation_loss": 5.557404518127441, "epoch": 1.59, "learning_rate": 3.863060016906171e-05, "loss": 104.9771, "step": 1883, "task_loss": 2.737915277481079 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9316905202431248, "compression/movement_sparsity/importance_threshold": -0.0003130535804542773, "compression/movement_sparsity/linear_layer_sparsity": 0.8315933945644111, "compression/movement_sparsity/model_sparsity": 0.8030256031806096, "compression_loss": 100.03730010986328, "distillation_loss": 5.830221652984619, "epoch": 1.59, "learning_rate": 3.862456225093588e-05, "loss": 105.6906, "step": 1884, "task_loss": 2.8700015544891357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9321130519063607, "compression/movement_sparsity/importance_threshold": -0.0003111171720596873, "compression/movement_sparsity/linear_layer_sparsity": 0.8318918684045044, "compression/movement_sparsity/model_sparsity": 0.8033138235260827, "compression_loss": 100.08203125, "distillation_loss": 4.320580959320068, "epoch": 1.59, "learning_rate": 3.861852433281005e-05, "loss": 105.6126, "step": 1885, "task_loss": 2.302692413330078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9325338375734779, "compression/movement_sparsity/importance_threshold": -0.00030918876534126796, "compression/movement_sparsity/linear_layer_sparsity": 0.8323495418067041, "compression/movement_sparsity/model_sparsity": 0.8037557744389487, "compression_loss": 100.12654876708984, "distillation_loss": 4.9025163650512695, "epoch": 1.59, "learning_rate": 3.8612486414684216e-05, "loss": 105.3926, "step": 1886, "task_loss": 2.771658182144165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.932952880859375, "compression/movement_sparsity/importance_threshold": -0.0003072683437324031, "compression/movement_sparsity/linear_layer_sparsity": 0.8326352090907565, "compression/movement_sparsity/model_sparsity": 0.8040316281729786, "compression_loss": 100.17088317871094, "distillation_loss": 5.695150375366211, "epoch": 1.59, "learning_rate": 3.860644849655839e-05, "loss": 105.4945, "step": 1887, "task_loss": 3.0422160625457764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9333701853789509, "compression/movement_sparsity/importance_threshold": -0.0003053558906664748, "compression/movement_sparsity/linear_layer_sparsity": 0.8332065675071965, "compression/movement_sparsity/model_sparsity": 0.8045833586701098, "compression_loss": 100.21504974365234, "distillation_loss": 7.198251724243164, "epoch": 1.6, "learning_rate": 3.8600410578432563e-05, "loss": 105.7727, "step": 1888, "task_loss": 3.4534101486206055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9337857547471043, "compression/movement_sparsity/importance_threshold": -0.00030345138957686615, "compression/movement_sparsity/linear_layer_sparsity": 0.8335849451946177, "compression/movement_sparsity/model_sparsity": 0.8049487379199421, "compression_loss": 100.25904846191406, "distillation_loss": 6.22447395324707, "epoch": 1.6, "learning_rate": 3.8594372660306724e-05, "loss": 105.5381, "step": 1889, "task_loss": 4.1725382804870605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9341995925787342, "compression/movement_sparsity/importance_threshold": -0.0003015548238969601, "compression/movement_sparsity/linear_layer_sparsity": 0.8339004944427655, "compression/movement_sparsity/model_sparsity": 0.8052534470806731, "compression_loss": 100.30284118652344, "distillation_loss": 5.533419132232666, "epoch": 1.6, "learning_rate": 3.85883347421809e-05, "loss": 106.1351, "step": 1890, "task_loss": 3.487635374069214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9346117024887393, "compression/movement_sparsity/importance_threshold": -0.0002996661770601379, "compression/movement_sparsity/linear_layer_sparsity": 0.8344449042403485, "compression/movement_sparsity/model_sparsity": 0.8057791547269086, "compression_loss": 100.34645080566406, "distillation_loss": 6.185797214508057, "epoch": 1.6, "learning_rate": 3.858229682405507e-05, "loss": 105.4381, "step": 1891, "task_loss": 3.6203243732452393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9350220880920183, "compression/movement_sparsity/importance_threshold": -0.00029778543249978345, "compression/movement_sparsity/linear_layer_sparsity": 0.8348014130043255, "compression/movement_sparsity/model_sparsity": 0.8061234163180938, "compression_loss": 100.38983917236328, "distillation_loss": 4.887860298156738, "epoch": 1.6, "learning_rate": 3.857625890592924e-05, "loss": 105.5618, "step": 1892, "task_loss": 2.213867664337158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.93543075300347, "compression/movement_sparsity/importance_threshold": -0.0002959125736492797, "compression/movement_sparsity/linear_layer_sparsity": 0.8351417049003178, "compression/movement_sparsity/model_sparsity": 0.8064520181405984, "compression_loss": 100.43310546875, "distillation_loss": 5.383342266082764, "epoch": 1.6, "learning_rate": 3.8570220987803406e-05, "loss": 105.7436, "step": 1893, "task_loss": 2.931389808654785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9358377008379933, "compression/movement_sparsity/importance_threshold": -0.000294047583942008, "compression/movement_sparsity/linear_layer_sparsity": 0.8354903079411522, "compression/movement_sparsity/model_sparsity": 0.8067886455945519, "compression_loss": 100.47607421875, "distillation_loss": 6.354804039001465, "epoch": 1.6, "learning_rate": 3.856418306967758e-05, "loss": 105.3174, "step": 1894, "task_loss": 3.955373764038086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.936242935210487, "compression/movement_sparsity/importance_threshold": -0.00029219044681135214, "compression/movement_sparsity/linear_layer_sparsity": 0.8359197091418873, "compression/movement_sparsity/model_sparsity": 0.807203295543049, "compression_loss": 100.51895904541016, "distillation_loss": 4.857506275177002, "epoch": 1.6, "learning_rate": 3.855814515155175e-05, "loss": 106.1741, "step": 1895, "task_loss": 3.2896347045898438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9366464597358498, "compression/movement_sparsity/importance_threshold": -0.00029034114569069427, "compression/movement_sparsity/linear_layer_sparsity": 0.8362563880150858, "compression/movement_sparsity/model_sparsity": 0.8075284084612079, "compression_loss": 100.56155395507812, "distillation_loss": 3.986485004425049, "epoch": 1.6, "learning_rate": 3.8552107233425914e-05, "loss": 105.2237, "step": 1896, "task_loss": 2.18231463432312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9370482780289806, "compression/movement_sparsity/importance_threshold": -0.0002884996640134174, "compression/movement_sparsity/linear_layer_sparsity": 0.8366288632395273, "compression/movement_sparsity/model_sparsity": 0.807888088015822, "compression_loss": 100.6039810180664, "distillation_loss": 6.138541221618652, "epoch": 1.6, "learning_rate": 3.854606931530009e-05, "loss": 105.6165, "step": 1897, "task_loss": 3.902263641357422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9374483937047782, "compression/movement_sparsity/importance_threshold": -0.00028666598521290363, "compression/movement_sparsity/linear_layer_sparsity": 0.8370334860199151, "compression/movement_sparsity/model_sparsity": 0.8082788107589381, "compression_loss": 100.64627075195312, "distillation_loss": 6.246413707733154, "epoch": 1.6, "learning_rate": 3.8540031397174256e-05, "loss": 105.5739, "step": 1898, "task_loss": 2.4191155433654785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9378468103781411, "compression/movement_sparsity/importance_threshold": -0.000284840092722536, "compression/movement_sparsity/linear_layer_sparsity": 0.8373624738049885, "compression/movement_sparsity/model_sparsity": 0.8085964968015095, "compression_loss": 100.68833923339844, "distillation_loss": 4.305167198181152, "epoch": 1.6, "learning_rate": 3.853399347904842e-05, "loss": 106.3612, "step": 1899, "task_loss": 1.5011531114578247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9382435316639686, "compression/movement_sparsity/importance_threshold": -0.00028302196997569747, "compression/movement_sparsity/linear_layer_sparsity": 0.8377890728263292, "compression/movement_sparsity/model_sparsity": 0.809008440834095, "compression_loss": 100.73023223876953, "distillation_loss": 4.841150283813477, "epoch": 1.61, "learning_rate": 3.85279555609226e-05, "loss": 105.261, "step": 1900, "task_loss": 2.80690598487854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9386385611771592, "compression/movement_sparsity/importance_threshold": -0.0002812116004057702, "compression/movement_sparsity/linear_layer_sparsity": 0.8382182236195439, "compression/movement_sparsity/model_sparsity": 0.8094228489773405, "compression_loss": 100.77194213867188, "distillation_loss": 6.077312469482422, "epoch": 1.61, "learning_rate": 3.852191764279677e-05, "loss": 106.3862, "step": 1901, "task_loss": 4.14441442489624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9390319025326116, "compression/movement_sparsity/importance_threshold": -0.0002794089674461372, "compression/movement_sparsity/linear_layer_sparsity": 0.8384479069365459, "compression/movement_sparsity/model_sparsity": 0.809644641965815, "compression_loss": 100.81343078613281, "distillation_loss": 6.960179328918457, "epoch": 1.61, "learning_rate": 3.851587972467093e-05, "loss": 106.5601, "step": 1902, "task_loss": 2.959937572479248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9394235593452248, "compression/movement_sparsity/importance_threshold": -0.00027761405453018053, "compression/movement_sparsity/linear_layer_sparsity": 0.8389279500772304, "compression/movement_sparsity/model_sparsity": 0.8101081941478316, "compression_loss": 100.85475158691406, "distillation_loss": 5.766426086425781, "epoch": 1.61, "learning_rate": 3.8509841806545105e-05, "loss": 105.9637, "step": 1903, "task_loss": 3.5418336391448975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9398135352298975, "compression/movement_sparsity/importance_threshold": -0.00027582684509128413, "compression/movement_sparsity/linear_layer_sparsity": 0.8394401646221966, "compression/movement_sparsity/model_sparsity": 0.810602812547422, "compression_loss": 100.8958511352539, "distillation_loss": 5.975924015045166, "epoch": 1.61, "learning_rate": 3.850380388841928e-05, "loss": 106.0186, "step": 1904, "task_loss": 3.1926465034484863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9402018338015287, "compression/movement_sparsity/importance_threshold": -0.00027404732256282836, "compression/movement_sparsity/linear_layer_sparsity": 0.8399846101922824, "compression/movement_sparsity/model_sparsity": 0.8111285547372649, "compression_loss": 100.93680572509766, "distillation_loss": 5.874066352844238, "epoch": 1.61, "learning_rate": 3.849776597029344e-05, "loss": 105.9865, "step": 1905, "task_loss": 3.7388508319854736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9405884586750167, "compression/movement_sparsity/importance_threshold": -0.00027227547037819884, "compression/movement_sparsity/linear_layer_sparsity": 0.8402955447875552, "compression/movement_sparsity/model_sparsity": 0.8114288077726433, "compression_loss": 100.97761535644531, "distillation_loss": 5.25878381729126, "epoch": 1.61, "learning_rate": 3.8491728052167613e-05, "loss": 105.6841, "step": 1906, "task_loss": 2.6373636722564697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9409734134652609, "compression/movement_sparsity/importance_threshold": -0.0002705112719707768, "compression/movement_sparsity/linear_layer_sparsity": 0.8407583694301736, "compression/movement_sparsity/model_sparsity": 0.8118757329649726, "compression_loss": 101.01817321777344, "distillation_loss": 6.527778625488281, "epoch": 1.61, "learning_rate": 3.848569013404179e-05, "loss": 105.9816, "step": 1907, "task_loss": 2.381190061569214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9413567017871597, "compression/movement_sparsity/importance_threshold": -0.00026875471077394356, "compression/movement_sparsity/linear_layer_sparsity": 0.8411881641284407, "compression/movement_sparsity/model_sparsity": 0.812290762893151, "compression_loss": 101.05860900878906, "distillation_loss": 7.208245277404785, "epoch": 1.61, "learning_rate": 3.8479652215915955e-05, "loss": 106.7493, "step": 1908, "task_loss": 3.331112861633301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.941738327255612, "compression/movement_sparsity/importance_threshold": -0.0002670057702210847, "compression/movement_sparsity/linear_layer_sparsity": 0.8415353243449911, "compression/movement_sparsity/model_sparsity": 0.8126259970882733, "compression_loss": 101.09882354736328, "distillation_loss": 5.543517589569092, "epoch": 1.61, "learning_rate": 3.847361429779012e-05, "loss": 106.5562, "step": 1909, "task_loss": 2.416193723678589 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9421182934855167, "compression/movement_sparsity/importance_threshold": -0.0002652644337455797, "compression/movement_sparsity/linear_layer_sparsity": 0.8417603095399446, "compression/movement_sparsity/model_sparsity": 0.8128432533496447, "compression_loss": 101.13890838623047, "distillation_loss": 6.003857612609863, "epoch": 1.61, "learning_rate": 3.8467576379664296e-05, "loss": 106.5098, "step": 1910, "task_loss": 3.1569244861602783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9424966040917724, "compression/movement_sparsity/importance_threshold": -0.0002635306847808142, "compression/movement_sparsity/linear_layer_sparsity": 0.8421579805306006, "compression/movement_sparsity/model_sparsity": 0.8132272631183926, "compression_loss": 101.17877197265625, "distillation_loss": 6.6615142822265625, "epoch": 1.61, "learning_rate": 3.846153846153846e-05, "loss": 107.3841, "step": 1911, "task_loss": 3.3205208778381348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9428732626892781, "compression/movement_sparsity/importance_threshold": -0.0002618045067601686, "compression/movement_sparsity/linear_layer_sparsity": 0.8424899732059183, "compression/movement_sparsity/model_sparsity": 0.8135478508239842, "compression_loss": 101.2184829711914, "distillation_loss": 5.634515762329102, "epoch": 1.62, "learning_rate": 3.845550054341263e-05, "loss": 106.414, "step": 1912, "task_loss": 3.141801118850708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9432482728929323, "compression/movement_sparsity/importance_threshold": -0.0002600858831170276, "compression/movement_sparsity/linear_layer_sparsity": 0.8427674366626372, "compression/movement_sparsity/model_sparsity": 0.8138157825573874, "compression_loss": 101.25806427001953, "distillation_loss": 5.967013359069824, "epoch": 1.62, "learning_rate": 3.8449462625286804e-05, "loss": 107.0152, "step": 1913, "task_loss": 3.0385961532592773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9436216383176342, "compression/movement_sparsity/importance_threshold": -0.00025837479728477164, "compression/movement_sparsity/linear_layer_sparsity": 0.8431775684084728, "compression/movement_sparsity/model_sparsity": 0.8142118250160406, "compression_loss": 101.29745483398438, "distillation_loss": 5.186330795288086, "epoch": 1.62, "learning_rate": 3.844342470716097e-05, "loss": 106.6169, "step": 1914, "task_loss": 2.8391292095184326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9439933625782824, "compression/movement_sparsity/importance_threshold": -0.00025667123269678457, "compression/movement_sparsity/linear_layer_sparsity": 0.8434832205974828, "compression/movement_sparsity/model_sparsity": 0.8145069771120621, "compression_loss": 101.336669921875, "distillation_loss": 6.493569374084473, "epoch": 1.62, "learning_rate": 3.843738678903514e-05, "loss": 106.9997, "step": 1915, "task_loss": 3.2028591632843018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9443634492897756, "compression/movement_sparsity/importance_threshold": -0.0002549751727864494, "compression/movement_sparsity/linear_layer_sparsity": 0.8436710262377477, "compression/movement_sparsity/model_sparsity": 0.814688331050826, "compression_loss": 101.37574005126953, "distillation_loss": 5.533164024353027, "epoch": 1.62, "learning_rate": 3.843134887090931e-05, "loss": 106.4385, "step": 1916, "task_loss": 2.5165042877197266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9447319020670126, "compression/movement_sparsity/importance_threshold": -0.0002532866009871482, "compression/movement_sparsity/linear_layer_sparsity": 0.8439543563849434, "compression/movement_sparsity/model_sparsity": 0.8149619279358402, "compression_loss": 101.41458129882812, "distillation_loss": 5.6729583740234375, "epoch": 1.62, "learning_rate": 3.8425310952783486e-05, "loss": 107.1203, "step": 1917, "task_loss": 2.7738327980041504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9450987245248924, "compression/movement_sparsity/importance_threshold": -0.00025160550073226404, "compression/movement_sparsity/linear_layer_sparsity": 0.8443372891044015, "compression/movement_sparsity/model_sparsity": 0.815331705738346, "compression_loss": 101.45332336425781, "distillation_loss": 5.163906574249268, "epoch": 1.62, "learning_rate": 3.8419273034657653e-05, "loss": 106.8631, "step": 1918, "task_loss": 2.3977153301239014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9454639202783136, "compression/movement_sparsity/importance_threshold": -0.00024993185545517903, "compression/movement_sparsity/linear_layer_sparsity": 0.844673383693386, "compression/movement_sparsity/model_sparsity": 0.8156562544442509, "compression_loss": 101.49183654785156, "distillation_loss": 6.869274616241455, "epoch": 1.62, "learning_rate": 3.841323511653182e-05, "loss": 108.4332, "step": 1919, "task_loss": 2.6736629009246826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9458274929421752, "compression/movement_sparsity/importance_threshold": -0.00024826564858927615, "compression/movement_sparsity/linear_layer_sparsity": 0.8448627394754434, "compression/movement_sparsity/model_sparsity": 0.8158391052726683, "compression_loss": 101.53022003173828, "distillation_loss": 5.624550819396973, "epoch": 1.62, "learning_rate": 3.8407197198405995e-05, "loss": 106.6934, "step": 1920, "task_loss": 2.427751064300537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9461894461313758, "compression/movement_sparsity/importance_threshold": -0.0002466068635679384, "compression/movement_sparsity/linear_layer_sparsity": 0.84534625254891, "compression/movement_sparsity/model_sparsity": 0.8163060081846011, "compression_loss": 101.56845092773438, "distillation_loss": 4.345903396606445, "epoch": 1.62, "learning_rate": 3.840115928028016e-05, "loss": 106.6486, "step": 1921, "task_loss": 3.194505453109741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9465497834608143, "compression/movement_sparsity/importance_threshold": -0.00024495548382454796, "compression/movement_sparsity/linear_layer_sparsity": 0.8457369836740021, "compression/movement_sparsity/model_sparsity": 0.8166833164935167, "compression_loss": 101.60649871826172, "distillation_loss": 4.8123650550842285, "epoch": 1.62, "learning_rate": 3.839512136215433e-05, "loss": 107.0029, "step": 1922, "task_loss": 2.016848087310791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9469085085453894, "compression/movement_sparsity/importance_threshold": -0.0002433114927924869, "compression/movement_sparsity/linear_layer_sparsity": 0.8460168438884157, "compression/movement_sparsity/model_sparsity": 0.8169535626486145, "compression_loss": 101.64433288574219, "distillation_loss": 4.795164585113525, "epoch": 1.63, "learning_rate": 3.83890834440285e-05, "loss": 106.1464, "step": 1923, "task_loss": 2.3285000324249268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.947265625, "compression/movement_sparsity/importance_threshold": -0.00024167487390513998, "compression/movement_sparsity/linear_layer_sparsity": 0.8463091648580089, "compression/movement_sparsity/model_sparsity": 0.8172358414936177, "compression_loss": 101.68196105957031, "distillation_loss": 5.377124786376953, "epoch": 1.63, "learning_rate": 3.838304552590267e-05, "loss": 107.2051, "step": 1924, "task_loss": 2.396437406539917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9476211364395448, "compression/movement_sparsity/importance_threshold": -0.00024004561059588845, "compression/movement_sparsity/linear_layer_sparsity": 0.8466682492421953, "compression/movement_sparsity/model_sparsity": 0.8175825902245345, "compression_loss": 101.71945190429688, "distillation_loss": 6.381112098693848, "epoch": 1.63, "learning_rate": 3.837700760777684e-05, "loss": 107.5373, "step": 1925, "task_loss": 3.424048662185669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9479750464789227, "compression/movement_sparsity/importance_threshold": -0.00023842368629811445, "compression/movement_sparsity/linear_layer_sparsity": 0.8470521835917348, "compression/movement_sparsity/model_sparsity": 0.8179533352480471, "compression_loss": 101.75677490234375, "distillation_loss": 6.1295905113220215, "epoch": 1.63, "learning_rate": 3.837096968965101e-05, "loss": 107.3591, "step": 1926, "task_loss": 2.9069697856903076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9483273587330325, "compression/movement_sparsity/importance_threshold": -0.00023680908444520098, "compression/movement_sparsity/linear_layer_sparsity": 0.8472765010333007, "compression/movement_sparsity/model_sparsity": 0.818169946695414, "compression_loss": 101.79398345947266, "distillation_loss": 3.9729080200195312, "epoch": 1.63, "learning_rate": 3.836493177152518e-05, "loss": 106.3959, "step": 1927, "task_loss": 1.6243078708648682 }, { "compression/movement_sparsity/importance_regularization_factor": 0.948678076816773, "compression/movement_sparsity/importance_threshold": -0.00023520178847053104, "compression/movement_sparsity/linear_layer_sparsity": 0.8475928492006801, "compression/movement_sparsity/model_sparsity": 0.8184754273300432, "compression_loss": 101.83099365234375, "distillation_loss": 4.306410789489746, "epoch": 1.63, "learning_rate": 3.835889385339935e-05, "loss": 106.6834, "step": 1928, "task_loss": 3.405989646911621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9490272043450428, "compression/movement_sparsity/importance_threshold": -0.00023360178180748762, "compression/movement_sparsity/linear_layer_sparsity": 0.8478480263880875, "compression/movement_sparsity/model_sparsity": 0.8187218383960464, "compression_loss": 101.8677749633789, "distillation_loss": 5.650971412658691, "epoch": 1.63, "learning_rate": 3.835285593527352e-05, "loss": 107.7888, "step": 1929, "task_loss": 2.607145309448242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9493747449327409, "compression/movement_sparsity/importance_threshold": -0.00023200904788945372, "compression/movement_sparsity/linear_layer_sparsity": 0.8483280695287722, "compression/movement_sparsity/model_sparsity": 0.8191853905780629, "compression_loss": 101.90441131591797, "distillation_loss": 3.9981789588928223, "epoch": 1.63, "learning_rate": 3.834681801714769e-05, "loss": 106.4288, "step": 1930, "task_loss": 3.032195806503296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.949720702194766, "compression/movement_sparsity/importance_threshold": -0.0002304235701498106, "compression/movement_sparsity/linear_layer_sparsity": 0.8486747647027849, "compression/movement_sparsity/model_sparsity": 0.8195201757062892, "compression_loss": 101.94091033935547, "distillation_loss": 4.763620376586914, "epoch": 1.63, "learning_rate": 3.834078009902186e-05, "loss": 106.9663, "step": 1931, "task_loss": 3.8943209648132324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9500650797460171, "compression/movement_sparsity/importance_threshold": -0.00022884533202194128, "compression/movement_sparsity/linear_layer_sparsity": 0.8489735127987338, "compression/movement_sparsity/model_sparsity": 0.8198086608860857, "compression_loss": 101.97721862792969, "distillation_loss": 6.821642875671387, "epoch": 1.63, "learning_rate": 3.833474218089603e-05, "loss": 107.4615, "step": 1932, "task_loss": 3.402501344680786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9504078812013926, "compression/movement_sparsity/importance_threshold": -0.00022727431693922873, "compression/movement_sparsity/linear_layer_sparsity": 0.8492519182646959, "compression/movement_sparsity/model_sparsity": 0.8200775022678166, "compression_loss": 102.0133285522461, "distillation_loss": 5.580167770385742, "epoch": 1.63, "learning_rate": 3.83287042627702e-05, "loss": 106.7296, "step": 1933, "task_loss": 2.5173094272613525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9507491101757917, "compression/movement_sparsity/importance_threshold": -0.00022571050833505683, "compression/movement_sparsity/linear_layer_sparsity": 0.849611503463923, "compression/movement_sparsity/model_sparsity": 0.8204247346092368, "compression_loss": 102.04922485351562, "distillation_loss": 6.53983211517334, "epoch": 1.63, "learning_rate": 3.832266634464437e-05, "loss": 107.4979, "step": 1934, "task_loss": 3.2253458499908447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.951088770284113, "compression/movement_sparsity/importance_threshold": -0.00022415388964280598, "compression/movement_sparsity/linear_layer_sparsity": 0.8498557223412732, "compression/movement_sparsity/model_sparsity": 0.8206605638168448, "compression_loss": 102.0849838256836, "distillation_loss": 6.080235481262207, "epoch": 1.64, "learning_rate": 3.8316628426518536e-05, "loss": 108.2923, "step": 1935, "task_loss": 3.2126784324645996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9514268651412552, "compression/movement_sparsity/importance_threshold": -0.00022260444429586003, "compression/movement_sparsity/linear_layer_sparsity": 0.8502520578251539, "compression/movement_sparsity/model_sparsity": 0.8210432839575837, "compression_loss": 102.12060546875, "distillation_loss": 7.040731430053711, "epoch": 1.64, "learning_rate": 3.831059050839271e-05, "loss": 107.8568, "step": 1936, "task_loss": 3.5568227767944336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9517633983621173, "compression/movement_sparsity/importance_threshold": -0.000221062155727602, "compression/movement_sparsity/linear_layer_sparsity": 0.8506569667855649, "compression/movement_sparsity/model_sparsity": 0.8214342830495589, "compression_loss": 102.15605926513672, "distillation_loss": 6.006886005401611, "epoch": 1.64, "learning_rate": 3.830455259026688e-05, "loss": 107.433, "step": 1937, "task_loss": 2.5286214351654053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.952098373561598, "compression/movement_sparsity/importance_threshold": -0.00021952700737141399, "compression/movement_sparsity/linear_layer_sparsity": 0.8508389653561911, "compression/movement_sparsity/model_sparsity": 0.821610029409391, "compression_loss": 102.19135284423828, "distillation_loss": 5.9427056312561035, "epoch": 1.64, "learning_rate": 3.829851467214105e-05, "loss": 107.0022, "step": 1938, "task_loss": 1.745589017868042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9524317943545961, "compression/movement_sparsity/importance_threshold": -0.00021799898266067902, "compression/movement_sparsity/linear_layer_sparsity": 0.8512765942325949, "compression/movement_sparsity/model_sparsity": 0.8220326243875865, "compression_loss": 102.22645568847656, "distillation_loss": 6.120401859283447, "epoch": 1.64, "learning_rate": 3.829247675401522e-05, "loss": 108.2287, "step": 1939, "task_loss": 2.5676510334014893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9527636643560105, "compression/movement_sparsity/importance_threshold": -0.00021647806502877922, "compression/movement_sparsity/linear_layer_sparsity": 0.8517026447422243, "compression/movement_sparsity/model_sparsity": 0.8224440387515254, "compression_loss": 102.2613525390625, "distillation_loss": 6.05389404296875, "epoch": 1.64, "learning_rate": 3.8286438835889386e-05, "loss": 107.6082, "step": 1940, "task_loss": 3.6275060176849365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9530939871807399, "compression/movement_sparsity/importance_threshold": -0.00021496423790909758, "compression/movement_sparsity/linear_layer_sparsity": 0.8519418912416703, "compression/movement_sparsity/model_sparsity": 0.822675066397707, "compression_loss": 102.29612731933594, "distillation_loss": 5.096152305603027, "epoch": 1.64, "learning_rate": 3.828040091776356e-05, "loss": 108.238, "step": 1941, "task_loss": 3.199683666229248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.953422766443683, "compression/movement_sparsity/importance_threshold": -0.0002134574847350171, "compression/movement_sparsity/linear_layer_sparsity": 0.85223216125443, "compression/movement_sparsity/model_sparsity": 0.8229553647425536, "compression_loss": 102.33072662353516, "distillation_loss": 6.219561576843262, "epoch": 1.64, "learning_rate": 3.827436299963773e-05, "loss": 107.9894, "step": 1942, "task_loss": 3.188004732131958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9537500057597388, "compression/movement_sparsity/importance_threshold": -0.00021195778893991993, "compression/movement_sparsity/linear_layer_sparsity": 0.8526211991477177, "compression/movement_sparsity/model_sparsity": 0.8233310379873863, "compression_loss": 102.36510467529297, "distillation_loss": 6.276364326477051, "epoch": 1.64, "learning_rate": 3.8268325081511894e-05, "loss": 107.749, "step": 1943, "task_loss": 2.485442876815796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9540757087438059, "compression/movement_sparsity/importance_threshold": -0.00021046513395718905, "compression/movement_sparsity/linear_layer_sparsity": 0.8529834672846629, "compression/movement_sparsity/model_sparsity": 0.8236808610993602, "compression_loss": 102.39942169189453, "distillation_loss": 5.8469390869140625, "epoch": 1.64, "learning_rate": 3.826228716338607e-05, "loss": 108.4381, "step": 1944, "task_loss": 2.4769740104675293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9543998790107833, "compression/movement_sparsity/importance_threshold": -0.00020897950322020659, "compression/movement_sparsity/linear_layer_sparsity": 0.8532753828325564, "compression/movement_sparsity/model_sparsity": 0.8239627484501464, "compression_loss": 102.43356323242188, "distillation_loss": 6.050718784332275, "epoch": 1.64, "learning_rate": 3.8256249245260235e-05, "loss": 107.8398, "step": 1945, "task_loss": 2.8765108585357666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9547225201755696, "compression/movement_sparsity/importance_threshold": -0.0002075008801623564, "compression/movement_sparsity/linear_layer_sparsity": 0.8536249398068018, "compression/movement_sparsity/model_sparsity": 0.8243002970669633, "compression_loss": 102.4675064086914, "distillation_loss": 6.006316184997559, "epoch": 1.64, "learning_rate": 3.82502113271344e-05, "loss": 108.2434, "step": 1946, "task_loss": 4.176956653594971 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9550436358530636, "compression/movement_sparsity/importance_threshold": -0.0002060292482170198, "compression/movement_sparsity/linear_layer_sparsity": 0.8539569563304548, "compression/movement_sparsity/model_sparsity": 0.8246209078016266, "compression_loss": 102.50137329101562, "distillation_loss": 6.109163284301758, "epoch": 1.65, "learning_rate": 3.8244173409008576e-05, "loss": 107.8848, "step": 1947, "task_loss": 2.258094072341919 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9553632296581643, "compression/movement_sparsity/importance_threshold": -0.00020456459081757972, "compression/movement_sparsity/linear_layer_sparsity": 0.8541746320106475, "compression/movement_sparsity/model_sparsity": 0.8248311056525559, "compression_loss": 102.53490447998047, "distillation_loss": 6.7761664390563965, "epoch": 1.65, "learning_rate": 3.8238135490882743e-05, "loss": 108.3591, "step": 1948, "task_loss": 3.7448716163635254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9556813052057704, "compression/movement_sparsity/importance_threshold": -0.00020310689139742006, "compression/movement_sparsity/linear_layer_sparsity": 0.8545277662351836, "compression/movement_sparsity/model_sparsity": 0.8251721086301111, "compression_loss": 102.568359375, "distillation_loss": 6.108149528503418, "epoch": 1.65, "learning_rate": 3.823209757275692e-05, "loss": 108.7619, "step": 1949, "task_loss": 2.715660333633423 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9559978661107806, "compression/movement_sparsity/importance_threshold": -0.00020165613338992208, "compression/movement_sparsity/linear_layer_sparsity": 0.8548684158562049, "compression/movement_sparsity/model_sparsity": 0.8255010558886896, "compression_loss": 102.60164642333984, "distillation_loss": 8.210382461547852, "epoch": 1.65, "learning_rate": 3.8226059654631085e-05, "loss": 109.1441, "step": 1950, "task_loss": 3.2184159755706787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.956312915988094, "compression/movement_sparsity/importance_threshold": -0.00020021230022846878, "compression/movement_sparsity/linear_layer_sparsity": 0.855302562875659, "compression/movement_sparsity/model_sparsity": 0.825920288622433, "compression_loss": 102.63478088378906, "distillation_loss": 4.681859016418457, "epoch": 1.65, "learning_rate": 3.822002173650526e-05, "loss": 107.7917, "step": 1951, "task_loss": 1.857974648475647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.956626458452609, "compression/movement_sparsity/importance_threshold": -0.00019877537534644316, "compression/movement_sparsity/linear_layer_sparsity": 0.8554288755834256, "compression/movement_sparsity/model_sparsity": 0.8260422621001047, "compression_loss": 102.667724609375, "distillation_loss": 6.685154438018799, "epoch": 1.65, "learning_rate": 3.8213983818379426e-05, "loss": 107.6651, "step": 1952, "task_loss": 2.4582998752593994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9569384971192245, "compression/movement_sparsity/importance_threshold": -0.00019734534217722734, "compression/movement_sparsity/linear_layer_sparsity": 0.855709498944568, "compression/movement_sparsity/model_sparsity": 0.8263132451854933, "compression_loss": 102.70050811767578, "distillation_loss": 6.30096435546875, "epoch": 1.65, "learning_rate": 3.820794590025359e-05, "loss": 108.3714, "step": 1953, "task_loss": 3.50051212310791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9572490356028394, "compression/movement_sparsity/importance_threshold": -0.0001959221841542052, "compression/movement_sparsity/linear_layer_sparsity": 0.855944750847856, "compression/movement_sparsity/model_sparsity": 0.8265404154621838, "compression_loss": 102.73308563232422, "distillation_loss": 5.181475639343262, "epoch": 1.65, "learning_rate": 3.820190798212777e-05, "loss": 108.4525, "step": 1954, "task_loss": 2.4829981327056885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9575580775183525, "compression/movement_sparsity/importance_threshold": -0.00019450588471075798, "compression/movement_sparsity/linear_layer_sparsity": 0.8562039464797567, "compression/movement_sparsity/model_sparsity": 0.8267907069267498, "compression_loss": 102.76554870605469, "distillation_loss": 4.995074272155762, "epoch": 1.65, "learning_rate": 3.8195870064001934e-05, "loss": 108.2452, "step": 1955, "task_loss": 2.606818199157715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9578656264806625, "compression/movement_sparsity/importance_threshold": -0.00019309642728026957, "compression/movement_sparsity/linear_layer_sparsity": 0.8564849394900959, "compression/movement_sparsity/model_sparsity": 0.8270620469627481, "compression_loss": 102.7978515625, "distillation_loss": 6.647708892822266, "epoch": 1.65, "learning_rate": 3.81898321458761e-05, "loss": 108.1532, "step": 1956, "task_loss": 2.6218669414520264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9581716861046683, "compression/movement_sparsity/importance_threshold": -0.00019169379529612122, "compression/movement_sparsity/linear_layer_sparsity": 0.8567724073234613, "compression/movement_sparsity/model_sparsity": 0.827339639391683, "compression_loss": 102.82999420166016, "distillation_loss": 5.546730041503906, "epoch": 1.65, "learning_rate": 3.8183794227750275e-05, "loss": 108.2147, "step": 1957, "task_loss": 2.647742509841919 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9584762600052686, "compression/movement_sparsity/importance_threshold": -0.0001902979721916968, "compression/movement_sparsity/linear_layer_sparsity": 0.8570457688665134, "compression/movement_sparsity/model_sparsity": 0.8276036101247728, "compression_loss": 102.86198425292969, "distillation_loss": 6.408062934875488, "epoch": 1.65, "learning_rate": 3.817775630962444e-05, "loss": 108.9942, "step": 1958, "task_loss": 2.989100217819214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9587793517973624, "compression/movement_sparsity/importance_threshold": -0.00018890894140037844, "compression/movement_sparsity/linear_layer_sparsity": 0.8572989308695904, "compression/movement_sparsity/model_sparsity": 0.8278480752342267, "compression_loss": 102.89374542236328, "distillation_loss": 6.032017230987549, "epoch": 1.66, "learning_rate": 3.817171839149861e-05, "loss": 108.3911, "step": 1959, "task_loss": 2.662623167037964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9590809650958482, "compression/movement_sparsity/importance_threshold": -0.00018752668635554914, "compression/movement_sparsity/linear_layer_sparsity": 0.8575005685443129, "compression/movement_sparsity/model_sparsity": 0.8280427860345123, "compression_loss": 102.9253921508789, "distillation_loss": 6.726193428039551, "epoch": 1.66, "learning_rate": 3.8165680473372784e-05, "loss": 108.6975, "step": 1960, "task_loss": 2.9843013286590576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.959381103515625, "compression/movement_sparsity/importance_threshold": -0.00018615119049059103, "compression/movement_sparsity/linear_layer_sparsity": 0.8577571289351661, "compression/movement_sparsity/model_sparsity": 0.8282905327866676, "compression_loss": 102.95684051513672, "distillation_loss": 5.5959320068359375, "epoch": 1.66, "learning_rate": 3.815964255524696e-05, "loss": 108.5168, "step": 1961, "task_loss": 2.7751340866088867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9596797706715915, "compression/movement_sparsity/importance_threshold": -0.0001847824372388871, "compression/movement_sparsity/linear_layer_sparsity": 0.857912023872756, "compression/movement_sparsity/model_sparsity": 0.8284401066066388, "compression_loss": 102.9881591796875, "distillation_loss": 4.423287868499756, "epoch": 1.66, "learning_rate": 3.815360463712112e-05, "loss": 107.7289, "step": 1962, "task_loss": 1.4729368686676025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9599769701786466, "compression/movement_sparsity/importance_threshold": -0.00018342041003382038, "compression/movement_sparsity/linear_layer_sparsity": 0.8581732704614901, "compression/movement_sparsity/model_sparsity": 0.8286923785713614, "compression_loss": 103.0193099975586, "distillation_loss": 4.258683204650879, "epoch": 1.66, "learning_rate": 3.814756671899529e-05, "loss": 107.5291, "step": 1963, "task_loss": 1.974421501159668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.960272705651689, "compression/movement_sparsity/importance_threshold": -0.00018206509230877296, "compression/movement_sparsity/linear_layer_sparsity": 0.858411276847502, "compression/movement_sparsity/model_sparsity": 0.8289222087058205, "compression_loss": 103.05030059814453, "distillation_loss": 5.709534645080566, "epoch": 1.66, "learning_rate": 3.8141528800869466e-05, "loss": 108.2681, "step": 1964, "task_loss": 3.69650936126709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9605669807056174, "compression/movement_sparsity/importance_threshold": -0.00018071646749712785, "compression/movement_sparsity/linear_layer_sparsity": 0.8586379195017567, "compression/movement_sparsity/model_sparsity": 0.8291410654876672, "compression_loss": 103.0811996459961, "distillation_loss": 6.430551052093506, "epoch": 1.66, "learning_rate": 3.813549088274363e-05, "loss": 108.9351, "step": 1965, "task_loss": 3.296149253845215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9608597989553309, "compression/movement_sparsity/importance_threshold": -0.0001793745190322672, "compression/movement_sparsity/linear_layer_sparsity": 0.8590599038702224, "compression/movement_sparsity/model_sparsity": 0.8295485533949003, "compression_loss": 103.11182403564453, "distillation_loss": 5.128777503967285, "epoch": 1.66, "learning_rate": 3.81294529646178e-05, "loss": 108.769, "step": 1966, "task_loss": 3.2883784770965576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.961151164015728, "compression/movement_sparsity/importance_threshold": -0.00017803923034757484, "compression/movement_sparsity/linear_layer_sparsity": 0.8594259877408109, "compression/movement_sparsity/model_sparsity": 0.8299020611583284, "compression_loss": 103.14239501953125, "distillation_loss": 6.642801761627197, "epoch": 1.66, "learning_rate": 3.8123415046491974e-05, "loss": 109.1039, "step": 1967, "task_loss": 3.4295120239257812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9614410795017077, "compression/movement_sparsity/importance_threshold": -0.00017671058487643294, "compression/movement_sparsity/linear_layer_sparsity": 0.8596079028422636, "compression/movement_sparsity/model_sparsity": 0.8300777269164099, "compression_loss": 103.17276763916016, "distillation_loss": 4.965295791625977, "epoch": 1.66, "learning_rate": 3.811737712836614e-05, "loss": 108.8985, "step": 1968, "task_loss": 2.6745333671569824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9617295490281687, "compression/movement_sparsity/importance_threshold": -0.00017538856605222274, "compression/movement_sparsity/linear_layer_sparsity": 0.8598598724285771, "compression/movement_sparsity/model_sparsity": 0.8303210405722844, "compression_loss": 103.20299530029297, "distillation_loss": 6.368554592132568, "epoch": 1.66, "learning_rate": 3.811133921024031e-05, "loss": 108.5951, "step": 1969, "task_loss": 3.027096748352051 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9620165762100098, "compression/movement_sparsity/importance_threshold": -0.0001740731573083281, "compression/movement_sparsity/linear_layer_sparsity": 0.8600730407734034, "compression/movement_sparsity/model_sparsity": 0.8305268859286834, "compression_loss": 103.23311614990234, "distillation_loss": 6.641411304473877, "epoch": 1.66, "learning_rate": 3.810530129211448e-05, "loss": 108.4539, "step": 1970, "task_loss": 4.101413726806641 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9623021646621298, "compression/movement_sparsity/importance_threshold": -0.00017276434207813204, "compression/movement_sparsity/linear_layer_sparsity": 0.8603355393997393, "compression/movement_sparsity/model_sparsity": 0.8307803669196644, "compression_loss": 103.26300048828125, "distillation_loss": 5.485191822052002, "epoch": 1.67, "learning_rate": 3.809926337398865e-05, "loss": 109.5694, "step": 1971, "task_loss": 3.2495362758636475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9625863179994275, "compression/movement_sparsity/importance_threshold": -0.00017146210379501755, "compression/movement_sparsity/linear_layer_sparsity": 0.8606749846798294, "compression/movement_sparsity/model_sparsity": 0.8311081512101276, "compression_loss": 103.29280090332031, "distillation_loss": 3.6900763511657715, "epoch": 1.67, "learning_rate": 3.809322545586282e-05, "loss": 109.4049, "step": 1972, "task_loss": 2.036069631576538 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9628690398368016, "compression/movement_sparsity/importance_threshold": -0.00017016642589236589, "compression/movement_sparsity/linear_layer_sparsity": 0.8610028158206422, "compression/movement_sparsity/model_sparsity": 0.831424720342727, "compression_loss": 103.3224105834961, "distillation_loss": 5.363384246826172, "epoch": 1.67, "learning_rate": 3.808718753773699e-05, "loss": 108.6903, "step": 1973, "task_loss": 4.000527381896973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9631503337891512, "compression/movement_sparsity/importance_threshold": -0.00016887729180356092, "compression/movement_sparsity/linear_layer_sparsity": 0.8613122837432957, "compression/movement_sparsity/model_sparsity": 0.8317235570902026, "compression_loss": 103.35186004638672, "distillation_loss": 5.431252956390381, "epoch": 1.67, "learning_rate": 3.8081149619611165e-05, "loss": 109.6182, "step": 1974, "task_loss": 2.70221209526062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9634302034713748, "compression/movement_sparsity/importance_threshold": -0.0001675946849619839, "compression/movement_sparsity/linear_layer_sparsity": 0.8615632516995277, "compression/movement_sparsity/model_sparsity": 0.8319659035250704, "compression_loss": 103.3811264038086, "distillation_loss": 5.1192731857299805, "epoch": 1.67, "learning_rate": 3.8075111701485325e-05, "loss": 107.9119, "step": 1975, "task_loss": 2.93863582611084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9637086524983713, "compression/movement_sparsity/importance_threshold": -0.00016631858880101873, "compression/movement_sparsity/linear_layer_sparsity": 0.8617644601042154, "compression/movement_sparsity/model_sparsity": 0.8321601998020675, "compression_loss": 103.4102783203125, "distillation_loss": 7.60659122467041, "epoch": 1.67, "learning_rate": 3.80690737833595e-05, "loss": 108.9236, "step": 1976, "task_loss": 4.169926643371582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9639856844850394, "compression/movement_sparsity/importance_threshold": -0.00016504898675404837, "compression/movement_sparsity/linear_layer_sparsity": 0.8621096528331059, "compression/movement_sparsity/model_sparsity": 0.8324935340987836, "compression_loss": 103.43925476074219, "distillation_loss": 5.863859176635742, "epoch": 1.67, "learning_rate": 3.806303586523367e-05, "loss": 108.7849, "step": 1977, "task_loss": 3.101158618927002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9642613030462781, "compression/movement_sparsity/importance_threshold": -0.0001637858622544541, "compression/movement_sparsity/linear_layer_sparsity": 0.8622586691560513, "compression/movement_sparsity/model_sparsity": 0.8326374312526079, "compression_loss": 103.46808624267578, "distillation_loss": 5.456670761108398, "epoch": 1.67, "learning_rate": 3.8056997947107834e-05, "loss": 110.2452, "step": 1978, "task_loss": 2.6411280632019043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.964535511796986, "compression/movement_sparsity/importance_threshold": -0.00016252919873561977, "compression/movement_sparsity/linear_layer_sparsity": 0.8626279248552311, "compression/movement_sparsity/model_sparsity": 0.8329940018825576, "compression_loss": 103.49676513671875, "distillation_loss": 6.113132953643799, "epoch": 1.67, "learning_rate": 3.805096002898201e-05, "loss": 108.8986, "step": 1979, "task_loss": 3.7056808471679688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.964808314352062, "compression/movement_sparsity/importance_threshold": -0.0001612789796309284, "compression/movement_sparsity/linear_layer_sparsity": 0.8628994381522996, "compression/movement_sparsity/model_sparsity": 0.8332561878625993, "compression_loss": 103.52533721923828, "distillation_loss": 6.656094551086426, "epoch": 1.67, "learning_rate": 3.804492211085618e-05, "loss": 109.5799, "step": 1980, "task_loss": 3.908527135848999 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9650797143264049, "compression/movement_sparsity/importance_threshold": -0.00016003518837376125, "compression/movement_sparsity/linear_layer_sparsity": 0.8631337718946797, "compression/movement_sparsity/model_sparsity": 0.8334824715200335, "compression_loss": 103.55368041992188, "distillation_loss": 5.03590202331543, "epoch": 1.67, "learning_rate": 3.803888419273035e-05, "loss": 109.0151, "step": 1981, "task_loss": 3.170179843902588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9653497153349135, "compression/movement_sparsity/importance_threshold": -0.0001587978083975013, "compression/movement_sparsity/linear_layer_sparsity": 0.8633448058134991, "compression/movement_sparsity/model_sparsity": 0.8336862557745253, "compression_loss": 103.5819320678711, "distillation_loss": 6.887996673583984, "epoch": 1.67, "learning_rate": 3.8032846274604516e-05, "loss": 109.6422, "step": 1982, "task_loss": 3.0930848121643066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9656183209924866, "compression/movement_sparsity/importance_threshold": -0.00015756682313553157, "compression/movement_sparsity/linear_layer_sparsity": 0.863611728306028, "compression/movement_sparsity/model_sparsity": 0.8339440086582861, "compression_loss": 103.61000061035156, "distillation_loss": 6.123382568359375, "epoch": 1.68, "learning_rate": 3.802680835647869e-05, "loss": 108.6118, "step": 1983, "task_loss": 2.073955774307251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9658855349140228, "compression/movement_sparsity/importance_threshold": -0.00015634221602123504, "compression/movement_sparsity/linear_layer_sparsity": 0.8639208623519877, "compression/movement_sparsity/model_sparsity": 0.8342425229987596, "compression_loss": 103.637939453125, "distillation_loss": 5.547741413116455, "epoch": 1.68, "learning_rate": 3.802077043835286e-05, "loss": 108.8775, "step": 1984, "task_loss": 3.258735418319702 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9661513607144212, "compression/movement_sparsity/importance_threshold": -0.00015512397048799385, "compression/movement_sparsity/linear_layer_sparsity": 0.864244651199972, "compression/movement_sparsity/model_sparsity": 0.8345551887037246, "compression_loss": 103.66566467285156, "distillation_loss": 5.859804630279541, "epoch": 1.68, "learning_rate": 3.8014732520227024e-05, "loss": 108.8543, "step": 1985, "task_loss": 3.2972655296325684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9664158020085805, "compression/movement_sparsity/importance_threshold": -0.000153912069969191, "compression/movement_sparsity/linear_layer_sparsity": 0.8644349012946021, "compression/movement_sparsity/model_sparsity": 0.8347389031223265, "compression_loss": 103.69327545166016, "distillation_loss": 5.29553747177124, "epoch": 1.68, "learning_rate": 3.80086946021012e-05, "loss": 108.6147, "step": 1986, "task_loss": 3.5131468772888184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9666788624113993, "compression/movement_sparsity/importance_threshold": -0.0001527064978982095, "compression/movement_sparsity/linear_layer_sparsity": 0.8647002974936735, "compression/movement_sparsity/model_sparsity": 0.8349951821455055, "compression_loss": 103.72068786621094, "distillation_loss": 4.401576042175293, "epoch": 1.68, "learning_rate": 3.8002656683975365e-05, "loss": 109.6174, "step": 1987, "task_loss": 3.474736452102661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9669405455377766, "compression/movement_sparsity/importance_threshold": -0.00015150723770843144, "compression/movement_sparsity/linear_layer_sparsity": 0.8649043319260907, "compression/movement_sparsity/model_sparsity": 0.8351922073674859, "compression_loss": 103.74798583984375, "distillation_loss": 4.527218341827393, "epoch": 1.68, "learning_rate": 3.799661876584953e-05, "loss": 109.2056, "step": 1988, "task_loss": 2.819434404373169 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9672008550026112, "compression/movement_sparsity/importance_threshold": -0.000150314272833239, "compression/movement_sparsity/linear_layer_sparsity": 0.8652662065655039, "compression/movement_sparsity/model_sparsity": 0.8355416504997787, "compression_loss": 103.77509307861328, "distillation_loss": 7.132045269012451, "epoch": 1.68, "learning_rate": 3.7990580847723706e-05, "loss": 109.2047, "step": 1989, "task_loss": 3.5168325901031494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9674597944208018, "compression/movement_sparsity/importance_threshold": -0.000149127586706016, "compression/movement_sparsity/linear_layer_sparsity": 0.8654931592481173, "compression/movement_sparsity/model_sparsity": 0.8357608066595561, "compression_loss": 103.80208587646484, "distillation_loss": 5.200125694274902, "epoch": 1.68, "learning_rate": 3.798454292959788e-05, "loss": 109.0548, "step": 1990, "task_loss": 2.7154173851013184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9677173674072472, "compression/movement_sparsity/importance_threshold": -0.0001479471627601446, "compression/movement_sparsity/linear_layer_sparsity": 0.8656933064018852, "compression/movement_sparsity/model_sparsity": 0.8359540781428675, "compression_loss": 103.82891845703125, "distillation_loss": 5.541769981384277, "epoch": 1.68, "learning_rate": 3.797850501147205e-05, "loss": 108.585, "step": 1991, "task_loss": 3.0667059421539307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9679735775768464, "compression/movement_sparsity/importance_threshold": -0.00014677298442900607, "compression/movement_sparsity/linear_layer_sparsity": 0.8658997733645002, "compression/movement_sparsity/model_sparsity": 0.8361534523301499, "compression_loss": 103.8556137084961, "distillation_loss": 6.19271993637085, "epoch": 1.68, "learning_rate": 3.7972467093346215e-05, "loss": 109.3831, "step": 1992, "task_loss": 2.851672410964966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9682284285444979, "compression/movement_sparsity/importance_threshold": -0.000145605035145986, "compression/movement_sparsity/linear_layer_sparsity": 0.8661657776961211, "compression/movement_sparsity/model_sparsity": 0.8364103185946546, "compression_loss": 103.88213348388672, "distillation_loss": 5.576090335845947, "epoch": 1.68, "learning_rate": 3.796642917522039e-05, "loss": 109.6619, "step": 1993, "task_loss": 2.691758155822754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9684819239251007, "compression/movement_sparsity/importance_threshold": -0.00014444329834446476, "compression/movement_sparsity/linear_layer_sparsity": 0.8664373983106984, "compression/movement_sparsity/model_sparsity": 0.8366726082055185, "compression_loss": 103.90856170654297, "distillation_loss": 6.278306484222412, "epoch": 1.69, "learning_rate": 3.7960391257094556e-05, "loss": 109.832, "step": 1994, "task_loss": 3.111337661743164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9687340673335535, "compression/movement_sparsity/importance_threshold": -0.00014328775745782625, "compression/movement_sparsity/linear_layer_sparsity": 0.8666069480503127, "compression/movement_sparsity/model_sparsity": 0.836836333389981, "compression_loss": 103.93485260009766, "distillation_loss": 4.754920959472656, "epoch": 1.69, "learning_rate": 3.795435333896872e-05, "loss": 108.8991, "step": 1995, "task_loss": 1.9429529905319214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9689848623847551, "compression/movement_sparsity/importance_threshold": -0.0001421383959194526, "compression/movement_sparsity/linear_layer_sparsity": 0.8667492033702086, "compression/movement_sparsity/model_sparsity": 0.8369737018020099, "compression_loss": 103.96097564697266, "distillation_loss": 5.292407035827637, "epoch": 1.69, "learning_rate": 3.79483154208429e-05, "loss": 108.3606, "step": 1996, "task_loss": 3.5803542137145996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9692343126936044, "compression/movement_sparsity/importance_threshold": -0.0001409951971627259, "compression/movement_sparsity/linear_layer_sparsity": 0.8670896383562124, "compression/movement_sparsity/model_sparsity": 0.837302441798944, "compression_loss": 103.98695373535156, "distillation_loss": 5.6439971923828125, "epoch": 1.69, "learning_rate": 3.7942277502717064e-05, "loss": 109.0188, "step": 1997, "task_loss": 2.305114984512329 }, { "compression/movement_sparsity/importance_regularization_factor": 0.969482421875, "compression/movement_sparsity/importance_threshold": -0.00013985814462103008, "compression/movement_sparsity/linear_layer_sparsity": 0.8673184989056476, "compression/movement_sparsity/model_sparsity": 0.8375234402844487, "compression_loss": 104.01280975341797, "distillation_loss": 4.932579517364502, "epoch": 1.69, "learning_rate": 3.793623958459123e-05, "loss": 109.8856, "step": 1998, "task_loss": 3.0482895374298096 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9697291935438409, "compression/movement_sparsity/importance_threshold": -0.00013872722172774635, "compression/movement_sparsity/linear_layer_sparsity": 0.8675224379447237, "compression/movement_sparsity/model_sparsity": 0.8377203733901427, "compression_loss": 104.0384521484375, "distillation_loss": 6.895339488983154, "epoch": 1.69, "learning_rate": 3.7930201666465405e-05, "loss": 109.8212, "step": 1999, "task_loss": 3.641768455505371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9699746313150257, "compression/movement_sparsity/importance_threshold": -0.00013760241191625947, "compression/movement_sparsity/linear_layer_sparsity": 0.8678222711399275, "compression/movement_sparsity/model_sparsity": 0.8380099063926965, "compression_loss": 104.06401062011719, "distillation_loss": 5.027522087097168, "epoch": 1.69, "learning_rate": 3.792416374833957e-05, "loss": 109.8398, "step": 2000, "task_loss": 1.637202501296997 }, { "epoch": 1.69, "eval_accuracy": 0.4498613861386139, "eval_loss": 109.5154037475586, "eval_runtime": 342.4676, "eval_samples_per_second": 73.73, "eval_steps_per_second": 0.578, "step": 2000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9702187388034534, "compression/movement_sparsity/importance_threshold": -0.00013648369861994983, "compression/movement_sparsity/linear_layer_sparsity": 0.8680657149463813, "compression/movement_sparsity/model_sparsity": 0.8382449871554778, "compression_loss": 104.08944702148438, "distillation_loss": 6.077293872833252, "epoch": 1.69, "learning_rate": 3.7918125830213746e-05, "loss": 110.1307, "step": 2001, "task_loss": 2.64743971824646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9704615196240227, "compression/movement_sparsity/importance_threshold": -0.0001353710652722013, "compression/movement_sparsity/linear_layer_sparsity": 0.868254081022525, "compression/movement_sparsity/model_sparsity": 0.8384268822774241, "compression_loss": 104.11473083496094, "distillation_loss": 6.107161521911621, "epoch": 1.69, "learning_rate": 3.7912087912087914e-05, "loss": 109.0641, "step": 2002, "task_loss": 3.218642234802246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9707029773916325, "compression/movement_sparsity/importance_threshold": -0.00013426449530639685, "compression/movement_sparsity/linear_layer_sparsity": 0.8685757115961672, "compression/movement_sparsity/model_sparsity": 0.8387374638514102, "compression_loss": 104.13990020751953, "distillation_loss": 3.8981778621673584, "epoch": 1.69, "learning_rate": 3.790604999396208e-05, "loss": 109.2327, "step": 2003, "task_loss": 2.5974924564361572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9709431157211814, "compression/movement_sparsity/importance_threshold": -0.00013316397215591778, "compression/movement_sparsity/linear_layer_sparsity": 0.8686890627337137, "compression/movement_sparsity/model_sparsity": 0.8388469210286732, "compression_loss": 104.16494750976562, "distillation_loss": 4.993800163269043, "epoch": 1.69, "learning_rate": 3.7900012075836255e-05, "loss": 108.9419, "step": 2004, "task_loss": 2.9900248050689697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9711819382275683, "compression/movement_sparsity/importance_threshold": -0.00013206947925414794, "compression/movement_sparsity/linear_layer_sparsity": 0.8689208924008901, "compression/movement_sparsity/model_sparsity": 0.8390707866335906, "compression_loss": 104.18980407714844, "distillation_loss": 7.969875812530518, "epoch": 1.69, "learning_rate": 3.789397415771042e-05, "loss": 110.4224, "step": 2005, "task_loss": 4.050882339477539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.971419448525692, "compression/movement_sparsity/importance_threshold": -0.00013098100003447032, "compression/movement_sparsity/linear_layer_sparsity": 0.8692130822046393, "compression/movement_sparsity/model_sparsity": 0.8393529388187001, "compression_loss": 104.21456909179688, "distillation_loss": 4.820255756378174, "epoch": 1.7, "learning_rate": 3.7887936239584596e-05, "loss": 109.4775, "step": 2006, "task_loss": 3.13289737701416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9716556502304512, "compression/movement_sparsity/importance_threshold": -0.0001298985179302662, "compression/movement_sparsity/linear_layer_sparsity": 0.8694103317856717, "compression/movement_sparsity/model_sparsity": 0.8395434122698133, "compression_loss": 104.23922729492188, "distillation_loss": 6.918586254119873, "epoch": 1.7, "learning_rate": 3.788189832145876e-05, "loss": 109.7696, "step": 2007, "task_loss": 2.6579549312591553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9718905469567449, "compression/movement_sparsity/importance_threshold": -0.00012882201637491944, "compression/movement_sparsity/linear_layer_sparsity": 0.8696527978103794, "compression/movement_sparsity/model_sparsity": 0.8397775488406596, "compression_loss": 104.26365661621094, "distillation_loss": 6.124418258666992, "epoch": 1.7, "learning_rate": 3.787586040333293e-05, "loss": 109.4794, "step": 2008, "task_loss": 4.721316337585449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9721241423194718, "compression/movement_sparsity/importance_threshold": -0.00012775147880181217, "compression/movement_sparsity/linear_layer_sparsity": 0.869796567499565, "compression/movement_sparsity/model_sparsity": 0.8399163795987343, "compression_loss": 104.28797149658203, "distillation_loss": 5.2887282371521, "epoch": 1.7, "learning_rate": 3.7869822485207104e-05, "loss": 109.821, "step": 2009, "task_loss": 3.3764915466308594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9723564399335306, "compression/movement_sparsity/importance_threshold": -0.0001266868886443265, "compression/movement_sparsity/linear_layer_sparsity": 0.8699438548182031, "compression/movement_sparsity/model_sparsity": 0.8400586071448685, "compression_loss": 104.31214904785156, "distillation_loss": 4.486602783203125, "epoch": 1.7, "learning_rate": 3.786378456708127e-05, "loss": 109.2602, "step": 2010, "task_loss": 2.6257615089416504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9725874434138202, "compression/movement_sparsity/importance_threshold": -0.00012562822933584634, "compression/movement_sparsity/linear_layer_sparsity": 0.8702148553760635, "compression/movement_sparsity/model_sparsity": 0.8403202979998711, "compression_loss": 104.33613586425781, "distillation_loss": 4.428776741027832, "epoch": 1.7, "learning_rate": 3.7857746648955445e-05, "loss": 109.4463, "step": 2011, "task_loss": 1.7912898063659668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9728171563752394, "compression/movement_sparsity/importance_threshold": -0.0001245754843097538, "compression/movement_sparsity/linear_layer_sparsity": 0.8703796592969587, "compression/movement_sparsity/model_sparsity": 0.8404794403990874, "compression_loss": 104.3600082397461, "distillation_loss": 5.384510517120361, "epoch": 1.7, "learning_rate": 3.785170873082961e-05, "loss": 109.9135, "step": 2012, "task_loss": 2.1067092418670654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9730455824326869, "compression/movement_sparsity/importance_threshold": -0.000123528636999431, "compression/movement_sparsity/linear_layer_sparsity": 0.8706232461934241, "compression/movement_sparsity/model_sparsity": 0.8407146593362982, "compression_loss": 104.38373565673828, "distillation_loss": 5.312527656555176, "epoch": 1.7, "learning_rate": 3.784567081270378e-05, "loss": 108.7, "step": 2013, "task_loss": 2.0374975204467773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9732727252010617, "compression/movement_sparsity/importance_threshold": -0.0001224876708382618, "compression/movement_sparsity/linear_layer_sparsity": 0.8708958922864182, "compression/movement_sparsity/model_sparsity": 0.8409779391972404, "compression_loss": 104.4073486328125, "distillation_loss": 4.819917678833008, "epoch": 1.7, "learning_rate": 3.7839632894577954e-05, "loss": 109.6504, "step": 2014, "task_loss": 2.216794967651367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9734985882952623, "compression/movement_sparsity/importance_threshold": -0.00012145256925962834, "compression/movement_sparsity/linear_layer_sparsity": 0.8709799099715804, "compression/movement_sparsity/model_sparsity": 0.8410590706164488, "compression_loss": 104.4307861328125, "distillation_loss": 6.861874580383301, "epoch": 1.7, "learning_rate": 3.783359497645212e-05, "loss": 109.9998, "step": 2015, "task_loss": 3.6201658248901367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9737231753301877, "compression/movement_sparsity/importance_threshold": -0.0001204233156969119, "compression/movement_sparsity/linear_layer_sparsity": 0.8712942668029646, "compression/movement_sparsity/model_sparsity": 0.8413626283236002, "compression_loss": 104.45419311523438, "distillation_loss": 5.21843147277832, "epoch": 1.7, "learning_rate": 3.782755705832629e-05, "loss": 110.5591, "step": 2016, "task_loss": 4.222533702850342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9739464899207367, "compression/movement_sparsity/importance_threshold": -0.0001193998935834972, "compression/movement_sparsity/linear_layer_sparsity": 0.8714539314076089, "compression/movement_sparsity/model_sparsity": 0.8415168079578891, "compression_loss": 104.47734832763672, "distillation_loss": 6.887425422668457, "epoch": 1.7, "learning_rate": 3.782151914020046e-05, "loss": 110.1044, "step": 2017, "task_loss": 4.146782398223877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9741685356818081, "compression/movement_sparsity/importance_threshold": -0.00011838228635276636, "compression/movement_sparsity/linear_layer_sparsity": 0.8717670123530561, "compression/movement_sparsity/model_sparsity": 0.8418191336097106, "compression_loss": 104.50047302246094, "distillation_loss": 6.678259372711182, "epoch": 1.71, "learning_rate": 3.781548122207463e-05, "loss": 110.4311, "step": 2018, "task_loss": 3.1694118976593018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9743893162283006, "compression/movement_sparsity/importance_threshold": -0.00011737047743810153, "compression/movement_sparsity/linear_layer_sparsity": 0.871927702436117, "compression/movement_sparsity/model_sparsity": 0.8419743034940779, "compression_loss": 104.5234375, "distillation_loss": 5.637489318847656, "epoch": 1.71, "learning_rate": 3.7809443303948796e-05, "loss": 110.088, "step": 2019, "task_loss": 3.203352451324463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.974608835175113, "compression/movement_sparsity/importance_threshold": -0.0001163644502728857, "compression/movement_sparsity/linear_layer_sparsity": 0.8721008413501897, "compression/movement_sparsity/model_sparsity": 0.8421414945538146, "compression_loss": 104.54627990722656, "distillation_loss": 6.463754177093506, "epoch": 1.71, "learning_rate": 3.780340538582297e-05, "loss": 110.9086, "step": 2020, "task_loss": 3.1289782524108887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9748270961371442, "compression/movement_sparsity/importance_threshold": -0.00011536418829050099, "compression/movement_sparsity/linear_layer_sparsity": 0.8724028447439032, "compression/movement_sparsity/model_sparsity": 0.8424331232018829, "compression_loss": 104.56893920898438, "distillation_loss": 5.853630065917969, "epoch": 1.71, "learning_rate": 3.7797367467697144e-05, "loss": 110.5666, "step": 2021, "task_loss": 3.053022861480713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.975044102729293, "compression/movement_sparsity/importance_threshold": -0.00011436967492433042, "compression/movement_sparsity/linear_layer_sparsity": 0.8727097966671856, "compression/movement_sparsity/model_sparsity": 0.8427295303823059, "compression_loss": 104.591552734375, "distillation_loss": 5.512618541717529, "epoch": 1.71, "learning_rate": 3.779132954957131e-05, "loss": 110.0251, "step": 2022, "task_loss": 3.4492053985595703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.975259858566458, "compression/movement_sparsity/importance_threshold": -0.00011338089360775783, "compression/movement_sparsity/linear_layer_sparsity": 0.8728126903097154, "compression/movement_sparsity/model_sparsity": 0.8428288893116771, "compression_loss": 104.61399841308594, "distillation_loss": 5.689352035522461, "epoch": 1.71, "learning_rate": 3.778529163144548e-05, "loss": 109.1284, "step": 2023, "task_loss": 3.4440691471099854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9754743672635382, "compression/movement_sparsity/importance_threshold": -0.00011239782777416364, "compression/movement_sparsity/linear_layer_sparsity": 0.8729675017781319, "compression/movement_sparsity/model_sparsity": 0.8429783825298975, "compression_loss": 104.63623809814453, "distillation_loss": 7.3119916915893555, "epoch": 1.71, "learning_rate": 3.777925371331965e-05, "loss": 110.1851, "step": 2024, "task_loss": 4.509551048278809 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9756876324354323, "compression/movement_sparsity/importance_threshold": -0.00011142046085693257, "compression/movement_sparsity/linear_layer_sparsity": 0.8730956269593793, "compression/movement_sparsity/model_sparsity": 0.8431021062170099, "compression_loss": 104.65841674804688, "distillation_loss": 5.1806440353393555, "epoch": 1.71, "learning_rate": 3.777321579519382e-05, "loss": 110.0079, "step": 2025, "task_loss": 2.0346271991729736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9758996576970391, "compression/movement_sparsity/importance_threshold": -0.00011044877628944676, "compression/movement_sparsity/linear_layer_sparsity": 0.8733447824421305, "compression/movement_sparsity/model_sparsity": 0.8433427024424368, "compression_loss": 104.68049621582031, "distillation_loss": 5.705599784851074, "epoch": 1.71, "learning_rate": 3.776717787706799e-05, "loss": 109.6138, "step": 2026, "task_loss": 3.5621676445007324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9761104466632575, "compression/movement_sparsity/importance_threshold": -0.00010948275750508746, "compression/movement_sparsity/linear_layer_sparsity": 0.8734759363619574, "compression/movement_sparsity/model_sparsity": 0.843469350821641, "compression_loss": 104.7023696899414, "distillation_loss": 7.041810035705566, "epoch": 1.71, "learning_rate": 3.776113995894216e-05, "loss": 110.7967, "step": 2027, "task_loss": 4.216024875640869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9763200029489862, "compression/movement_sparsity/importance_threshold": -0.00010852238793723854, "compression/movement_sparsity/linear_layer_sparsity": 0.8736979643633372, "compression/movement_sparsity/model_sparsity": 0.8436837514781353, "compression_loss": 104.72413635253906, "distillation_loss": 8.309090614318848, "epoch": 1.71, "learning_rate": 3.775510204081633e-05, "loss": 111.1466, "step": 2028, "task_loss": 3.5577404499053955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9765283301691241, "compression/movement_sparsity/importance_threshold": -0.000107567651019283, "compression/movement_sparsity/linear_layer_sparsity": 0.8739141972192459, "compression/movement_sparsity/model_sparsity": 0.8438925560702335, "compression_loss": 104.74580383300781, "distillation_loss": 6.622119903564453, "epoch": 1.71, "learning_rate": 3.7749064122690495e-05, "loss": 110.3037, "step": 2029, "task_loss": 3.230015277862549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9767354319385697, "compression/movement_sparsity/importance_threshold": -0.00010661853018460385, "compression/movement_sparsity/linear_layer_sparsity": 0.8740823399070793, "compression/movement_sparsity/model_sparsity": 0.8440549225394723, "compression_loss": 104.76725769042969, "distillation_loss": 6.331792831420898, "epoch": 1.72, "learning_rate": 3.774302620456467e-05, "loss": 110.173, "step": 2030, "task_loss": 3.355461835861206 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9769413118722222, "compression/movement_sparsity/importance_threshold": -0.00010567500886658147, "compression/movement_sparsity/linear_layer_sparsity": 0.8743552483317613, "compression/movement_sparsity/model_sparsity": 0.844318455720202, "compression_loss": 104.78858184814453, "distillation_loss": 5.869207859039307, "epoch": 1.72, "learning_rate": 3.773698828643884e-05, "loss": 110.8109, "step": 2031, "task_loss": 2.863466501235962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9771459735849801, "compression/movement_sparsity/importance_threshold": -0.00010473707049860146, "compression/movement_sparsity/linear_layer_sparsity": 0.8746000395691579, "compression/movement_sparsity/model_sparsity": 0.8445548376255282, "compression_loss": 104.80986022949219, "distillation_loss": 5.270971298217773, "epoch": 1.72, "learning_rate": 3.7730950368313004e-05, "loss": 110.6538, "step": 2032, "task_loss": 2.61366605758667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9773494206917425, "compression/movement_sparsity/importance_threshold": -0.00010380469851404336, "compression/movement_sparsity/linear_layer_sparsity": 0.8746730035509217, "compression/movement_sparsity/model_sparsity": 0.8446252950700549, "compression_loss": 104.83094787597656, "distillation_loss": 7.939752578735352, "epoch": 1.72, "learning_rate": 3.772491245018718e-05, "loss": 110.4977, "step": 2033, "task_loss": 3.8145623207092285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9775516568074077, "compression/movement_sparsity/importance_threshold": -0.00010287787634629276, "compression/movement_sparsity/linear_layer_sparsity": 0.8748842401805911, "compression/movement_sparsity/model_sparsity": 0.8448292750716553, "compression_loss": 104.85198211669922, "distillation_loss": 6.415461540222168, "epoch": 1.72, "learning_rate": 3.771887453206135e-05, "loss": 110.7197, "step": 2034, "task_loss": 2.833709239959717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.977752685546875, "compression/movement_sparsity/importance_threshold": -0.00010195658742873093, "compression/movement_sparsity/linear_layer_sparsity": 0.8750067013822114, "compression/movement_sparsity/model_sparsity": 0.8449475293542652, "compression_loss": 104.87284851074219, "distillation_loss": 4.156508445739746, "epoch": 1.72, "learning_rate": 3.771283661393551e-05, "loss": 110.016, "step": 2035, "task_loss": 2.050610065460205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.977952510525043, "compression/movement_sparsity/importance_threshold": -0.00010104081519474086, "compression/movement_sparsity/linear_layer_sparsity": 0.8751836321815923, "compression/movement_sparsity/model_sparsity": 0.8451183820363846, "compression_loss": 104.89370727539062, "distillation_loss": 5.763814926147461, "epoch": 1.72, "learning_rate": 3.7706798695809686e-05, "loss": 110.4756, "step": 2036, "task_loss": 3.26334285736084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9781511353568103, "compression/movement_sparsity/importance_threshold": -0.00010013054307770469, "compression/movement_sparsity/linear_layer_sparsity": 0.875268913828524, "compression/movement_sparsity/model_sparsity": 0.8452007339963872, "compression_loss": 104.91426849365234, "distillation_loss": 6.278050899505615, "epoch": 1.72, "learning_rate": 3.770076077768386e-05, "loss": 111.1227, "step": 2037, "task_loss": 3.8476943969726562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9783485636570761, "compression/movement_sparsity/importance_threshold": -9.922575451100542e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8753992926774545, "compression/movement_sparsity/model_sparsity": 0.8453266339307647, "compression_loss": 104.934814453125, "distillation_loss": 6.661416053771973, "epoch": 1.72, "learning_rate": 3.769472285955802e-05, "loss": 110.7395, "step": 2038, "task_loss": 3.3843183517456055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9785447990407389, "compression/movement_sparsity/importance_threshold": -9.832643292802604e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8756248144599516, "compression/movement_sparsity/model_sparsity": 0.8455444083462468, "compression_loss": 104.95524597167969, "distillation_loss": 5.422012805938721, "epoch": 1.72, "learning_rate": 3.7688684941432194e-05, "loss": 110.3132, "step": 2039, "task_loss": 2.0702829360961914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9787398451226976, "compression/movement_sparsity/importance_threshold": -9.743256176214868e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8758308283041963, "compression/movement_sparsity/model_sparsity": 0.8457433449811691, "compression_loss": 104.97550964355469, "distillation_loss": 5.226916790008545, "epoch": 1.72, "learning_rate": 3.768264702330637e-05, "loss": 111.0157, "step": 2040, "task_loss": 2.806084632873535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.978933705517851, "compression/movement_sparsity/importance_threshold": -9.654412444675634e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8759195441114073, "compression/movement_sparsity/model_sparsity": 0.8458290131274805, "compression_loss": 104.99564361572266, "distillation_loss": 7.217075824737549, "epoch": 1.72, "learning_rate": 3.7676609105180535e-05, "loss": 110.9239, "step": 2041, "task_loss": 3.5021729469299316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9791263838410977, "compression/movement_sparsity/importance_threshold": -9.566110441523203e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8760937800489025, "compression/movement_sparsity/model_sparsity": 0.8459972635245103, "compression_loss": 105.01563262939453, "distillation_loss": 6.3221893310546875, "epoch": 1.73, "learning_rate": 3.76705711870547e-05, "loss": 110.7368, "step": 2042, "task_loss": 3.669285535812378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9793178837073369, "compression/movement_sparsity/importance_threshold": -9.478348510095786e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8762351887528962, "compression/movement_sparsity/model_sparsity": 0.8461338144044978, "compression_loss": 105.03553009033203, "distillation_loss": 7.536378383636475, "epoch": 1.73, "learning_rate": 3.7664533268928877e-05, "loss": 110.7803, "step": 2043, "task_loss": 3.4504613876342773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.979508208731467, "compression/movement_sparsity/importance_threshold": -9.391124993731598e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8765098858027235, "compression/movement_sparsity/model_sparsity": 0.8463990747655966, "compression_loss": 105.0552749633789, "distillation_loss": 4.6447434425354, "epoch": 1.73, "learning_rate": 3.7658495350803044e-05, "loss": 110.4786, "step": 2044, "task_loss": 1.2674273252487183 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9796973625283871, "compression/movement_sparsity/importance_threshold": -9.304438235769024e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8767712039364635, "compression/movement_sparsity/model_sparsity": 0.8466514158175339, "compression_loss": 105.074951171875, "distillation_loss": 5.0842390060424805, "epoch": 1.73, "learning_rate": 3.765245743267721e-05, "loss": 110.2489, "step": 2045, "task_loss": 2.676974058151245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9798853487129958, "compression/movement_sparsity/importance_threshold": -9.218286579546278e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8770186661874105, "compression/movement_sparsity/model_sparsity": 0.846890376978878, "compression_loss": 105.09443664550781, "distillation_loss": 6.255548000335693, "epoch": 1.73, "learning_rate": 3.7646419514551385e-05, "loss": 110.849, "step": 2046, "task_loss": 2.5290029048919678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9800721709001919, "compression/movement_sparsity/importance_threshold": -9.13266836840166e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8771987449670473, "compression/movement_sparsity/model_sparsity": 0.8470642694984472, "compression_loss": 105.11384582519531, "distillation_loss": 4.824374675750732, "epoch": 1.73, "learning_rate": 3.764038159642556e-05, "loss": 110.4912, "step": 2047, "task_loss": 2.330326557159424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9802578327048743, "compression/movement_sparsity/importance_threshold": -9.047581945673382e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8774756956845579, "compression/movement_sparsity/model_sparsity": 0.8473317061068112, "compression_loss": 105.13311004638672, "distillation_loss": 5.46245002746582, "epoch": 1.73, "learning_rate": 3.763434367829972e-05, "loss": 109.9556, "step": 2048, "task_loss": 2.871610641479492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9804423377419418, "compression/movement_sparsity/importance_threshold": -8.963025654699659e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8776913800287554, "compression/movement_sparsity/model_sparsity": 0.8475399810302628, "compression_loss": 105.15223693847656, "distillation_loss": 5.157700061798096, "epoch": 1.73, "learning_rate": 3.762830576017389e-05, "loss": 109.9021, "step": 2049, "task_loss": 2.563660144805908 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9806256896262932, "compression/movement_sparsity/importance_threshold": -8.878997838818875e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8779092464956302, "compression/movement_sparsity/model_sparsity": 0.847750363113765, "compression_loss": 105.17127990722656, "distillation_loss": 7.161251068115234, "epoch": 1.73, "learning_rate": 3.762226784204807e-05, "loss": 111.3337, "step": 2050, "task_loss": 3.0803229808807373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.980807891972827, "compression/movement_sparsity/importance_threshold": -8.795496841369332e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8780547928857936, "compression/movement_sparsity/model_sparsity": 0.8478909095376731, "compression_loss": 105.19013977050781, "distillation_loss": 5.534578323364258, "epoch": 1.73, "learning_rate": 3.7616229923922234e-05, "loss": 110.9486, "step": 2051, "task_loss": 2.652740955352783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9809889483964425, "compression/movement_sparsity/importance_threshold": -8.712521005689155e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.878150233923551, "compression/movement_sparsity/model_sparsity": 0.8479830718821726, "compression_loss": 105.20893859863281, "distillation_loss": 5.226778030395508, "epoch": 1.73, "learning_rate": 3.76101920057964e-05, "loss": 110.4776, "step": 2052, "task_loss": 1.98257577419281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.981168862512038, "compression/movement_sparsity/importance_threshold": -8.630068675116644e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8784213179505848, "compression/movement_sparsity/model_sparsity": 0.8482448433389257, "compression_loss": 105.22758483886719, "distillation_loss": 6.403439521789551, "epoch": 1.73, "learning_rate": 3.7604154087670576e-05, "loss": 110.8545, "step": 2053, "task_loss": 2.808046579360962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9813476379345127, "compression/movement_sparsity/importance_threshold": -8.548138192990187e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8786778664172703, "compression/movement_sparsity/model_sparsity": 0.8484925785765453, "compression_loss": 105.24609375, "distillation_loss": 4.766055107116699, "epoch": 1.74, "learning_rate": 3.759811616954474e-05, "loss": 110.7477, "step": 2054, "task_loss": 1.9112141132354736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9815252782787652, "compression/movement_sparsity/importance_threshold": -8.466727902647908e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8788202409788425, "compression/movement_sparsity/model_sparsity": 0.848630062133932, "compression_loss": 105.26445770263672, "distillation_loss": 4.664975166320801, "epoch": 1.74, "learning_rate": 3.759207825141891e-05, "loss": 110.0479, "step": 2055, "task_loss": 2.2208364009857178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9817017871596944, "compression/movement_sparsity/importance_threshold": -8.385836147428109e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8790660099979853, "compression/movement_sparsity/model_sparsity": 0.8488673882311933, "compression_loss": 105.2827377319336, "distillation_loss": 5.773608684539795, "epoch": 1.74, "learning_rate": 3.7586040333293084e-05, "loss": 110.6225, "step": 2056, "task_loss": 3.4354066848754883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.981877168192199, "compression/movement_sparsity/importance_threshold": -8.305461270669088e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8792098870046796, "compression/movement_sparsity/model_sparsity": 0.8490063226200902, "compression_loss": 105.3008804321289, "distillation_loss": 4.593318939208984, "epoch": 1.74, "learning_rate": 3.758000241516725e-05, "loss": 109.9458, "step": 2057, "task_loss": 3.1893668174743652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9820514249911777, "compression/movement_sparsity/importance_threshold": -8.225601615709059e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8793049345449051, "compression/movement_sparsity/model_sparsity": 0.8490981049849086, "compression_loss": 105.31886291503906, "distillation_loss": 6.894649505615234, "epoch": 1.74, "learning_rate": 3.757396449704142e-05, "loss": 111.3554, "step": 2058, "task_loss": 3.278856039047241 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9822245611715296, "compression/movement_sparsity/importance_threshold": -8.146255525886322e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8794275269123694, "compression/movement_sparsity/model_sparsity": 0.8492164859274122, "compression_loss": 105.33679962158203, "distillation_loss": 4.260193824768066, "epoch": 1.74, "learning_rate": 3.756792657891559e-05, "loss": 110.2193, "step": 2059, "task_loss": 3.009625196456909 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9823965803481531, "compression/movement_sparsity/importance_threshold": -8.067421344539177e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8796258496684891, "compression/movement_sparsity/model_sparsity": 0.849407995686747, "compression_loss": 105.35462188720703, "distillation_loss": 6.1271820068359375, "epoch": 1.74, "learning_rate": 3.756188866078976e-05, "loss": 111.331, "step": 2060, "task_loss": 2.667715549468994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9825674861359474, "compression/movement_sparsity/importance_threshold": -7.989097415005923e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8798074428174156, "compression/movement_sparsity/model_sparsity": 0.8495833505523621, "compression_loss": 105.37230682373047, "distillation_loss": 5.754546165466309, "epoch": 1.74, "learning_rate": 3.755585074266393e-05, "loss": 110.4174, "step": 2061, "task_loss": 3.5361809730529785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9827372821498109, "compression/movement_sparsity/importance_threshold": -7.911282080624773e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8799968224478084, "compression/movement_sparsity/model_sparsity": 0.849766224409851, "compression_loss": 105.38987731933594, "distillation_loss": 5.472411155700684, "epoch": 1.74, "learning_rate": 3.75498128245381e-05, "loss": 111.0133, "step": 2062, "task_loss": 2.9424664974212646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9829059720046428, "compression/movement_sparsity/importance_threshold": -7.833973684733854e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8802260645706078, "compression/movement_sparsity/model_sparsity": 0.8499875913605011, "compression_loss": 105.40733337402344, "distillation_loss": 5.63942813873291, "epoch": 1.74, "learning_rate": 3.7543774906412274e-05, "loss": 110.9797, "step": 2063, "task_loss": 2.735076665878296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9830735593153417, "compression/movement_sparsity/importance_threshold": -7.757170570671639e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.880402792659139, "compression/movement_sparsity/model_sparsity": 0.8501582482955119, "compression_loss": 105.42459869384766, "distillation_loss": 8.046577453613281, "epoch": 1.74, "learning_rate": 3.753773698828644e-05, "loss": 110.9837, "step": 2064, "task_loss": 3.7293472290039062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9832400476968063, "compression/movement_sparsity/importance_threshold": -7.680871081776341e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8804310290881007, "compression/movement_sparsity/model_sparsity": 0.8501855147162733, "compression_loss": 105.44176483154297, "distillation_loss": 6.163135528564453, "epoch": 1.75, "learning_rate": 3.753169907016061e-05, "loss": 110.6246, "step": 2065, "task_loss": 2.9164552688598633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9834054407639355, "compression/movement_sparsity/importance_threshold": -7.605073561386174e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.880483280790681, "compression/movement_sparsity/model_sparsity": 0.850235971412125, "compression_loss": 105.45874786376953, "distillation_loss": 6.970304489135742, "epoch": 1.75, "learning_rate": 3.752566115203478e-05, "loss": 111.4643, "step": 2066, "task_loss": 2.9453229904174805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9835697421316281, "compression/movement_sparsity/importance_threshold": -7.529776352839523e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8806174992215904, "compression/movement_sparsity/model_sparsity": 0.8503655790270284, "compression_loss": 105.47573852539062, "distillation_loss": 4.62729549407959, "epoch": 1.75, "learning_rate": 3.751962323390895e-05, "loss": 111.3583, "step": 2067, "task_loss": 2.544719696044922 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9837329554147829, "compression/movement_sparsity/importance_threshold": -7.454977799474516e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8807731573059089, "compression/movement_sparsity/model_sparsity": 0.8505158897772903, "compression_loss": 105.49259185791016, "distillation_loss": 6.851388931274414, "epoch": 1.75, "learning_rate": 3.751358531578312e-05, "loss": 111.303, "step": 2068, "task_loss": 3.4843173027038574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9838950842282987, "compression/movement_sparsity/importance_threshold": -7.380676244629451e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8809806497469406, "compression/movement_sparsity/model_sparsity": 0.8507162542146511, "compression_loss": 105.50926208496094, "distillation_loss": 7.789928436279297, "epoch": 1.75, "learning_rate": 3.750754739765729e-05, "loss": 111.6312, "step": 2069, "task_loss": 3.45548677444458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9840561321870743, "compression/movement_sparsity/importance_threshold": -7.30687003164263e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8811208898825059, "compression/movement_sparsity/model_sparsity": 0.8508516766701307, "compression_loss": 105.52584838867188, "distillation_loss": 6.097065448760986, "epoch": 1.75, "learning_rate": 3.750150947953146e-05, "loss": 110.6666, "step": 2070, "task_loss": 2.724325180053711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9842161029060085, "compression/movement_sparsity/importance_threshold": -7.233557503852264e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8812170225219863, "compression/movement_sparsity/model_sparsity": 0.8509445068577063, "compression_loss": 105.54232025146484, "distillation_loss": 6.0153045654296875, "epoch": 1.75, "learning_rate": 3.749547156140563e-05, "loss": 111.0596, "step": 2071, "task_loss": 3.0505194664001465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.984375, "compression/movement_sparsity/importance_threshold": -7.16073700459674e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8814439990529349, "compression/movement_sparsity/model_sparsity": 0.8511636860465555, "compression_loss": 105.55864715576172, "distillation_loss": 6.1038818359375, "epoch": 1.75, "learning_rate": 3.74894336432798e-05, "loss": 111.1549, "step": 2072, "task_loss": 1.678033471107483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9845328270839477, "compression/movement_sparsity/importance_threshold": -7.088406877214185e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8816102338739464, "compression/movement_sparsity/model_sparsity": 0.8513242101900671, "compression_loss": 105.57491302490234, "distillation_loss": 5.14559268951416, "epoch": 1.75, "learning_rate": 3.748339572515397e-05, "loss": 110.5155, "step": 2073, "task_loss": 2.1572115421295166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9846895877727504, "compression/movement_sparsity/importance_threshold": -7.016565465042986e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8817406842678828, "compression/movement_sparsity/model_sparsity": 0.8514501792116593, "compression_loss": 105.59101104736328, "distillation_loss": 7.057475566864014, "epoch": 1.75, "learning_rate": 3.747735780702814e-05, "loss": 110.9064, "step": 2074, "task_loss": 2.9786875247955322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9848452856813068, "compression/movement_sparsity/importance_threshold": -6.945211111421355e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8819331880301962, "compression/movement_sparsity/model_sparsity": 0.8516360698775265, "compression_loss": 105.6069107055664, "distillation_loss": 5.669126987457275, "epoch": 1.75, "learning_rate": 3.747131988890231e-05, "loss": 111.0062, "step": 2075, "task_loss": 3.0092883110046387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9849999244245158, "compression/movement_sparsity/importance_threshold": -6.874342159687505e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8820135688442295, "compression/movement_sparsity/model_sparsity": 0.8517136893633175, "compression_loss": 105.62281799316406, "distillation_loss": 5.4461669921875, "epoch": 1.75, "learning_rate": 3.7465281970776475e-05, "loss": 111.2058, "step": 2076, "task_loss": 3.030094623565674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9851535076172762, "compression/movement_sparsity/importance_threshold": -6.803956953179823e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.882198309973412, "compression/movement_sparsity/model_sparsity": 0.8518920840663823, "compression_loss": 105.6385498046875, "distillation_loss": 7.324714660644531, "epoch": 1.76, "learning_rate": 3.745924405265065e-05, "loss": 111.2597, "step": 2077, "task_loss": 3.091132402420044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9853060388744866, "compression/movement_sparsity/importance_threshold": -6.734053835236436e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8823501403999194, "compression/movement_sparsity/model_sparsity": 0.8520386986506542, "compression_loss": 105.65415954589844, "distillation_loss": 5.518310546875, "epoch": 1.76, "learning_rate": 3.7453206134524816e-05, "loss": 111.1445, "step": 2078, "task_loss": 2.1889419555664062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9854575218110462, "compression/movement_sparsity/importance_threshold": -6.66463114919573e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8824396312780266, "compression/movement_sparsity/model_sparsity": 0.8521251152417921, "compression_loss": 105.66973876953125, "distillation_loss": 5.066429138183594, "epoch": 1.76, "learning_rate": 3.744716821639899e-05, "loss": 110.8633, "step": 2079, "task_loss": 1.980123519897461 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9856079600418534, "compression/movement_sparsity/importance_threshold": -6.595687238395917e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8826278781124939, "compression/movement_sparsity/model_sparsity": 0.8523068952183805, "compression_loss": 105.68510437011719, "distillation_loss": 4.309197902679443, "epoch": 1.76, "learning_rate": 3.744113029827316e-05, "loss": 111.0678, "step": 2080, "task_loss": 1.5891374349594116 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9857573571818071, "compression/movement_sparsity/importance_threshold": -6.527220446175212e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8827903568207002, "compression/movement_sparsity/model_sparsity": 0.852463792283117, "compression_loss": 105.700439453125, "distillation_loss": 6.537905693054199, "epoch": 1.76, "learning_rate": 3.743509238014733e-05, "loss": 111.3473, "step": 2081, "task_loss": 2.7876319885253906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9859057168458063, "compression/movement_sparsity/importance_threshold": -6.459229115871913e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8829216657547063, "compression/movement_sparsity/model_sparsity": 0.8525905903512865, "compression_loss": 105.7156982421875, "distillation_loss": 6.005692481994629, "epoch": 1.76, "learning_rate": 3.74290544620215e-05, "loss": 110.8163, "step": 2082, "task_loss": 3.296924352645874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9860530426487495, "compression/movement_sparsity/importance_threshold": -6.391711590824495e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8830382244933469, "compression/movement_sparsity/model_sparsity": 0.8527031449386782, "compression_loss": 105.73072814941406, "distillation_loss": 5.337156295776367, "epoch": 1.76, "learning_rate": 3.7423016543895666e-05, "loss": 111.2891, "step": 2083, "task_loss": 3.1113193035125732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9861993382055357, "compression/movement_sparsity/importance_threshold": -6.324666214370823e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8831784169322417, "compression/movement_sparsity/model_sparsity": 0.8528385213360146, "compression_loss": 105.74569702148438, "distillation_loss": 5.5595293045043945, "epoch": 1.76, "learning_rate": 3.741697862576984e-05, "loss": 111.6988, "step": 2084, "task_loss": 2.326220750808716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9863446071310636, "compression/movement_sparsity/importance_threshold": -6.258091329849544e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8833431254597959, "compression/movement_sparsity/model_sparsity": 0.8529975716189445, "compression_loss": 105.76061248779297, "distillation_loss": 4.541962623596191, "epoch": 1.76, "learning_rate": 3.741094070764401e-05, "loss": 111.0615, "step": 2085, "task_loss": 3.2567625045776367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.986488853040232, "compression/movement_sparsity/importance_threshold": -6.191985280598698e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8834677091632555, "compression/movement_sparsity/model_sparsity": 0.8531178754889259, "compression_loss": 105.775390625, "distillation_loss": 5.726102352142334, "epoch": 1.76, "learning_rate": 3.7404902789518174e-05, "loss": 111.7837, "step": 2086, "task_loss": 3.088817596435547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9866320795479399, "compression/movement_sparsity/importance_threshold": -6.12634640995667e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8835961562970289, "compression/movement_sparsity/model_sparsity": 0.8532419100685047, "compression_loss": 105.79009246826172, "distillation_loss": 6.148242950439453, "epoch": 1.76, "learning_rate": 3.739886487139235e-05, "loss": 111.3363, "step": 2087, "task_loss": 1.8176559209823608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9867742902690857, "compression/movement_sparsity/importance_threshold": -6.061173061261676e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8836890336387446, "compression/movement_sparsity/model_sparsity": 0.8533315967878083, "compression_loss": 105.80461120605469, "distillation_loss": 4.655555248260498, "epoch": 1.76, "learning_rate": 3.7392826953266515e-05, "loss": 111.2693, "step": 2088, "task_loss": 2.5371999740600586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9869154888185686, "compression/movement_sparsity/importance_threshold": -5.99646357785184e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8837223139906163, "compression/movement_sparsity/model_sparsity": 0.853363733857211, "compression_loss": 105.81907653808594, "distillation_loss": 6.034964561462402, "epoch": 1.77, "learning_rate": 3.738678903514068e-05, "loss": 111.6137, "step": 2089, "task_loss": 3.079648733139038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9870556788112871, "compression/movement_sparsity/importance_threshold": -5.9322163030657224e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.88392646766471, "compression/movement_sparsity/model_sparsity": 0.8535608742245493, "compression_loss": 105.83340454101562, "distillation_loss": 5.619376182556152, "epoch": 1.77, "learning_rate": 3.7380751117014856e-05, "loss": 111.7725, "step": 2090, "task_loss": 3.3019800186157227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9871948638621402, "compression/movement_sparsity/importance_threshold": -5.868429580241363e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8840926905615539, "compression/movement_sparsity/model_sparsity": 0.8537213868535252, "compression_loss": 105.84764862060547, "distillation_loss": 8.00346565246582, "epoch": 1.77, "learning_rate": 3.737471319888903e-05, "loss": 112.0501, "step": 2091, "task_loss": 2.8980355262756348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9873330475860265, "compression/movement_sparsity/importance_threshold": -5.805101752717236e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8842464884757213, "compression/movement_sparsity/model_sparsity": 0.8538699013362031, "compression_loss": 105.86175537109375, "distillation_loss": 4.832254409790039, "epoch": 1.77, "learning_rate": 3.736867528076319e-05, "loss": 111.193, "step": 2092, "task_loss": 2.9543192386627197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9874702335978449, "compression/movement_sparsity/importance_threshold": -5.7422311638313794e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8843511588192289, "compression/movement_sparsity/model_sparsity": 0.8539709759314076, "compression_loss": 105.87570190429688, "distillation_loss": 6.801855564117432, "epoch": 1.77, "learning_rate": 3.7362637362637365e-05, "loss": 110.751, "step": 2093, "task_loss": 3.4634461402893066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9876064255124943, "compression/movement_sparsity/importance_threshold": -5.679816156922094e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8845206727863403, "compression/movement_sparsity/model_sparsity": 0.8541346665722628, "compression_loss": 105.88961791992188, "distillation_loss": 4.672187328338623, "epoch": 1.77, "learning_rate": 3.735659944451154e-05, "loss": 111.1957, "step": 2094, "task_loss": 2.6212620735168457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9877416269448734, "compression/movement_sparsity/importance_threshold": -5.617855075327766e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8846990344858375, "compression/movement_sparsity/model_sparsity": 0.8543069009986776, "compression_loss": 105.90341186523438, "distillation_loss": 5.286535263061523, "epoch": 1.77, "learning_rate": 3.7350561526385706e-05, "loss": 111.5869, "step": 2095, "task_loss": 3.454380512237549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.987875841509881, "compression/movement_sparsity/importance_threshold": -5.556346262386609e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8848982992512004, "compression/movement_sparsity/model_sparsity": 0.8544993204063401, "compression_loss": 105.91706085205078, "distillation_loss": 5.831080436706543, "epoch": 1.77, "learning_rate": 3.734452360825987e-05, "loss": 110.8264, "step": 2096, "task_loss": 2.2341578006744385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9880090728224158, "compression/movement_sparsity/importance_threshold": -5.4952880614369225e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8851031564511845, "compression/movement_sparsity/model_sparsity": 0.8546971401312904, "compression_loss": 105.9306869506836, "distillation_loss": 5.957747459411621, "epoch": 1.77, "learning_rate": 3.733848569013405e-05, "loss": 110.9109, "step": 2097, "task_loss": 3.0612711906433105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9881413244973768, "compression/movement_sparsity/importance_threshold": -5.434678815816919e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8852918802523574, "compression/movement_sparsity/model_sparsity": 0.8548793806893106, "compression_loss": 105.94412994384766, "distillation_loss": 4.588245391845703, "epoch": 1.77, "learning_rate": 3.7332447772008214e-05, "loss": 111.4397, "step": 2098, "task_loss": 1.7837793827056885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9882726001496626, "compression/movement_sparsity/importance_threshold": -5.3745168688648995e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8853811087987765, "compression/movement_sparsity/model_sparsity": 0.854965543960661, "compression_loss": 105.95747375488281, "distillation_loss": 5.237850666046143, "epoch": 1.77, "learning_rate": 3.732640985388238e-05, "loss": 111.2195, "step": 2099, "task_loss": 2.59037446975708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9884029033941721, "compression/movement_sparsity/importance_threshold": -5.3148005639189894e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8854808902335535, "compression/movement_sparsity/model_sparsity": 0.8550618975961898, "compression_loss": 105.97077941894531, "distillation_loss": 4.877206802368164, "epoch": 1.77, "learning_rate": 3.7320371935756555e-05, "loss": 110.9113, "step": 2100, "task_loss": 1.8063442707061768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9885322378458041, "compression/movement_sparsity/importance_threshold": -5.255528244317749e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.88554904877576, "compression/movement_sparsity/model_sparsity": 0.8551277146827914, "compression_loss": 105.98399353027344, "distillation_loss": 6.506133079528809, "epoch": 1.78, "learning_rate": 3.731433401763072e-05, "loss": 111.0838, "step": 2101, "task_loss": 3.491468906402588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9886606071194574, "compression/movement_sparsity/importance_threshold": -5.196698253399218e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8855830207293546, "compression/movement_sparsity/model_sparsity": 0.8551605195952701, "compression_loss": 105.9970474243164, "distillation_loss": 5.474944591522217, "epoch": 1.78, "learning_rate": 3.730829609950489e-05, "loss": 110.7984, "step": 2102, "task_loss": 3.111976146697998 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9887880148300308, "compression/movement_sparsity/importance_threshold": -5.138308934501696e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.885770385175417, "compression/movement_sparsity/model_sparsity": 0.8553414474962097, "compression_loss": 106.00999450683594, "distillation_loss": 5.6466064453125, "epoch": 1.78, "learning_rate": 3.7302258181379063e-05, "loss": 111.2895, "step": 2103, "task_loss": 2.9231443405151367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9889144645924229, "compression/movement_sparsity/importance_threshold": -5.080358630963483e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8859330785186407, "compression/movement_sparsity/model_sparsity": 0.8554985518225904, "compression_loss": 106.02288055419922, "distillation_loss": 6.63942813873291, "epoch": 1.78, "learning_rate": 3.729622026325324e-05, "loss": 111.4918, "step": 2104, "task_loss": 2.9668469429016113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9890399600215328, "compression/movement_sparsity/importance_threshold": -5.022845686122879e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8861671141568297, "compression/movement_sparsity/model_sparsity": 0.8557245476166299, "compression_loss": 106.03561401367188, "distillation_loss": 7.036499500274658, "epoch": 1.78, "learning_rate": 3.72901823451274e-05, "loss": 112.4899, "step": 2105, "task_loss": 3.89689302444458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9891645047322591, "compression/movement_sparsity/importance_threshold": -4.9657684433180964e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8862855449897891, "compression/movement_sparsity/model_sparsity": 0.8558389099861412, "compression_loss": 106.04827880859375, "distillation_loss": 3.9980249404907227, "epoch": 1.78, "learning_rate": 3.728414442700157e-05, "loss": 110.5866, "step": 2106, "task_loss": 3.230001211166382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9892881023395008, "compression/movement_sparsity/importance_threshold": -4.909125245887349e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8864474274896136, "compression/movement_sparsity/model_sparsity": 0.855995231324088, "compression_loss": 106.06082916259766, "distillation_loss": 4.114044189453125, "epoch": 1.78, "learning_rate": 3.7278106508875746e-05, "loss": 110.9632, "step": 2107, "task_loss": 2.399688959121704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9894107564581565, "compression/movement_sparsity/importance_threshold": -4.852914437169023e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8866842414588618, "compression/movement_sparsity/model_sparsity": 0.8562239100049674, "compression_loss": 106.07331848144531, "distillation_loss": 6.446342468261719, "epoch": 1.78, "learning_rate": 3.7272068590749906e-05, "loss": 111.4477, "step": 2108, "task_loss": 3.7379603385925293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.989532470703125, "compression/movement_sparsity/importance_threshold": -4.797134360501332e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8868773772020598, "compression/movement_sparsity/model_sparsity": 0.8564104109412317, "compression_loss": 106.08560943603516, "distillation_loss": 5.716580390930176, "epoch": 1.78, "learning_rate": 3.726603067262408e-05, "loss": 111.3978, "step": 2109, "task_loss": 3.5821282863616943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9896532486893052, "compression/movement_sparsity/importance_threshold": -4.741783359222575e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8869425785506928, "compression/movement_sparsity/model_sparsity": 0.8564733724229562, "compression_loss": 106.0978775024414, "distillation_loss": 5.434313774108887, "epoch": 1.78, "learning_rate": 3.7259992754498254e-05, "loss": 111.3496, "step": 2110, "task_loss": 2.9184410572052 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9897730940315957, "compression/movement_sparsity/importance_threshold": -4.686859776670965e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8871682434232014, "compression/movement_sparsity/model_sparsity": 0.8566912850128678, "compression_loss": 106.10993194580078, "distillation_loss": 6.235025882720947, "epoch": 1.78, "learning_rate": 3.725395483637242e-05, "loss": 111.1849, "step": 2111, "task_loss": 3.016789197921753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9898920103448957, "compression/movement_sparsity/importance_threshold": -4.632361956184716e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8873505758705215, "compression/movement_sparsity/model_sparsity": 0.8568673537797022, "compression_loss": 106.12197875976562, "distillation_loss": 7.228424072265625, "epoch": 1.78, "learning_rate": 3.724791691824659e-05, "loss": 112.0856, "step": 2112, "task_loss": 3.5733635425567627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9900100012441035, "compression/movement_sparsity/importance_threshold": -4.578288241102301e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8874916864703243, "compression/movement_sparsity/model_sparsity": 0.8570036167962948, "compression_loss": 106.13388061523438, "distillation_loss": 6.386613368988037, "epoch": 1.79, "learning_rate": 3.724187900012076e-05, "loss": 111.6303, "step": 2113, "task_loss": 2.577563524246216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9901270703441183, "compression/movement_sparsity/importance_threshold": -4.5246369747617594e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8876803506506589, "compression/movement_sparsity/model_sparsity": 0.857185799781636, "compression_loss": 106.14569091796875, "distillation_loss": 7.202883720397949, "epoch": 1.79, "learning_rate": 3.723584108199493e-05, "loss": 111.5289, "step": 2114, "task_loss": 2.992640972137451 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9902432212598387, "compression/movement_sparsity/importance_threshold": -4.471406500501478e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8878123988830584, "compression/movement_sparsity/model_sparsity": 0.8573133117510248, "compression_loss": 106.15739440917969, "distillation_loss": 5.5941267013549805, "epoch": 1.79, "learning_rate": 3.72298031638691e-05, "loss": 111.9422, "step": 2115, "task_loss": 2.8053081035614014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9903584576061635, "compression/movement_sparsity/importance_threshold": -4.418595161659756e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8879864678822068, "compression/movement_sparsity/model_sparsity": 0.8574814009445535, "compression_loss": 106.16903686523438, "distillation_loss": 6.700503826141357, "epoch": 1.79, "learning_rate": 3.722376524574327e-05, "loss": 112.4869, "step": 2116, "task_loss": 3.0701663494110107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9904727829979916, "compression/movement_sparsity/importance_threshold": -4.36620130157472e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.888187557045218, "compression/movement_sparsity/model_sparsity": 0.8576755820761927, "compression_loss": 106.18060302734375, "distillation_loss": 7.17643928527832, "epoch": 1.79, "learning_rate": 3.721772732761744e-05, "loss": 111.8472, "step": 2117, "task_loss": 2.528869867324829 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9905862010502215, "compression/movement_sparsity/importance_threshold": -4.314223263584757e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8882712408536865, "compression/movement_sparsity/model_sparsity": 0.8577563910883987, "compression_loss": 106.19207763671875, "distillation_loss": 6.318110942840576, "epoch": 1.79, "learning_rate": 3.7211689409491605e-05, "loss": 111.8382, "step": 2118, "task_loss": 3.0412230491638184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9906987153777524, "compression/movement_sparsity/importance_threshold": -4.262659391028167e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8883496422558923, "compression/movement_sparsity/model_sparsity": 0.8578320991612478, "compression_loss": 106.20341491699219, "distillation_loss": 5.150668144226074, "epoch": 1.79, "learning_rate": 3.720565149136578e-05, "loss": 111.9707, "step": 2119, "task_loss": 3.167023181915283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.990810329595483, "compression/movement_sparsity/importance_threshold": -4.211508027243075e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8883780217748656, "compression/movement_sparsity/model_sparsity": 0.8578595037564388, "compression_loss": 106.21470642089844, "distillation_loss": 5.580702781677246, "epoch": 1.79, "learning_rate": 3.719961357323995e-05, "loss": 112.0015, "step": 2120, "task_loss": 2.5224430561065674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9909210473183119, "compression/movement_sparsity/importance_threshold": -4.160767515567782e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8885416809756679, "compression/movement_sparsity/model_sparsity": 0.8580175407602189, "compression_loss": 106.22589874267578, "distillation_loss": 6.30061149597168, "epoch": 1.79, "learning_rate": 3.7193575655114113e-05, "loss": 113.3346, "step": 2121, "task_loss": 3.008774518966675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.991030872161138, "compression/movement_sparsity/importance_threshold": -4.1104361993406745e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8886405680978721, "compression/movement_sparsity/model_sparsity": 0.858113030805563, "compression_loss": 106.23703002929688, "distillation_loss": 4.4872846603393555, "epoch": 1.79, "learning_rate": 3.718753773698829e-05, "loss": 111.6037, "step": 2122, "task_loss": 2.38594913482666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9911398077388601, "compression/movement_sparsity/importance_threshold": -4.0605124218998784e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.888829220354039, "compression/movement_sparsity/model_sparsity": 0.8582952022763685, "compression_loss": 106.24800109863281, "distillation_loss": 6.396997451782227, "epoch": 1.79, "learning_rate": 3.718149981886246e-05, "loss": 111.7976, "step": 2123, "task_loss": 2.88443922996521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9912478576663771, "compression/movement_sparsity/importance_threshold": -4.0109945265837804e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8888944455510072, "compression/movement_sparsity/model_sparsity": 0.8583581867871646, "compression_loss": 106.25888061523438, "distillation_loss": 4.987330436706543, "epoch": 1.79, "learning_rate": 3.717546190073663e-05, "loss": 111.8058, "step": 2124, "task_loss": 3.5149803161621094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9913550255585877, "compression/movement_sparsity/importance_threshold": -3.961880856730507e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8890153327624996, "compression/movement_sparsity/model_sparsity": 0.8584749211510496, "compression_loss": 106.26969909667969, "distillation_loss": 6.595762252807617, "epoch": 1.8, "learning_rate": 3.7169423982610796e-05, "loss": 111.5897, "step": 2125, "task_loss": 3.695420026779175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9914613150303906, "compression/movement_sparsity/importance_threshold": -3.9131697556784445e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8891489311366918, "compression/movement_sparsity/model_sparsity": 0.8586039300100917, "compression_loss": 106.28042602539062, "distillation_loss": 5.595943927764893, "epoch": 1.8, "learning_rate": 3.716338606448497e-05, "loss": 111.1841, "step": 2126, "task_loss": 3.5223121643066406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9915667296966847, "compression/movement_sparsity/importance_threshold": -3.864859566765893e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8892129877652316, "compression/movement_sparsity/model_sparsity": 0.85866578609638, "compression_loss": 106.29106140136719, "distillation_loss": 4.789682865142822, "epoch": 1.8, "learning_rate": 3.715734814635914e-05, "loss": 111.2505, "step": 2127, "task_loss": 3.266915798187256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9916712731723688, "compression/movement_sparsity/importance_threshold": -3.8169486333309785e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8893331118299953, "compression/movement_sparsity/model_sparsity": 0.8587817835299741, "compression_loss": 106.30154418945312, "distillation_loss": 5.2319488525390625, "epoch": 1.8, "learning_rate": 3.7151310228233304e-05, "loss": 111.8515, "step": 2128, "task_loss": 2.576676845550537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9917749490723418, "compression/movement_sparsity/importance_threshold": -3.769435298712001e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.889473602373081, "compression/movement_sparsity/model_sparsity": 0.8589174477907054, "compression_loss": 106.3120346069336, "distillation_loss": 6.967667102813721, "epoch": 1.8, "learning_rate": 3.714527231010748e-05, "loss": 112.5429, "step": 2129, "task_loss": 2.587092876434326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9918777610115023, "compression/movement_sparsity/importance_threshold": -3.7223179062472606e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8895484622974991, "compression/movement_sparsity/model_sparsity": 0.8589897360464236, "compression_loss": 106.32234191894531, "distillation_loss": 4.984579086303711, "epoch": 1.8, "learning_rate": 3.7139234391981645e-05, "loss": 111.8473, "step": 2130, "task_loss": 2.265969753265381 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9919797126047492, "compression/movement_sparsity/importance_threshold": -3.6755947992751434e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8896007140000793, "compression/movement_sparsity/model_sparsity": 0.8590401927422753, "compression_loss": 106.33267974853516, "distillation_loss": 4.177818775177002, "epoch": 1.8, "learning_rate": 3.713319647385581e-05, "loss": 111.3894, "step": 2131, "task_loss": 2.4165358543395996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9920808074669812, "compression/movement_sparsity/importance_threshold": -3.629264321133689e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8897094981814213, "compression/movement_sparsity/model_sparsity": 0.8591452398523288, "compression_loss": 106.34286499023438, "distillation_loss": 6.125123977661133, "epoch": 1.8, "learning_rate": 3.7127158555729986e-05, "loss": 111.3477, "step": 2132, "task_loss": 3.365981340408325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9921810492130972, "compression/movement_sparsity/importance_threshold": -3.5833248151612845e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8898398531820166, "compression/movement_sparsity/model_sparsity": 0.8592711167576348, "compression_loss": 106.35289001464844, "distillation_loss": 5.453841209411621, "epoch": 1.8, "learning_rate": 3.7121120637604153e-05, "loss": 111.2296, "step": 2133, "task_loss": 2.4984819889068604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9922804414579961, "compression/movement_sparsity/importance_threshold": -3.537774624696229e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8898506564778947, "compression/movement_sparsity/model_sparsity": 0.8592815489270647, "compression_loss": 106.36284637451172, "distillation_loss": 6.11942195892334, "epoch": 1.8, "learning_rate": 3.711508271947833e-05, "loss": 111.4087, "step": 2134, "task_loss": 2.3449668884277344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9923789878165764, "compression/movement_sparsity/importance_threshold": -3.4926120930767364e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.889905924994887, "compression/movement_sparsity/model_sparsity": 0.8593349188004723, "compression_loss": 106.37279510498047, "distillation_loss": 5.889513969421387, "epoch": 1.8, "learning_rate": 3.7109044801352495e-05, "loss": 111.761, "step": 2135, "task_loss": 3.039088487625122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9924766919037372, "compression/movement_sparsity/importance_threshold": -3.447835563641019e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.890004847889594, "compression/movement_sparsity/model_sparsity": 0.8594304433894239, "compression_loss": 106.38259887695312, "distillation_loss": 6.136451244354248, "epoch": 1.81, "learning_rate": 3.710300688322667e-05, "loss": 112.3329, "step": 2136, "task_loss": 2.967050790786743 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9925735573343771, "compression/movement_sparsity/importance_threshold": -3.403443379727464e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8900350995028862, "compression/movement_sparsity/model_sparsity": 0.8594596557667346, "compression_loss": 106.39230346679688, "distillation_loss": 4.895430564880371, "epoch": 1.81, "learning_rate": 3.7096968965100836e-05, "loss": 111.5927, "step": 2137, "task_loss": 3.0056662559509277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.992669587723395, "compression/movement_sparsity/importance_threshold": -3.359433884674284e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.890152337919082, "compression/movement_sparsity/model_sparsity": 0.8595728666826665, "compression_loss": 106.40189361572266, "distillation_loss": 4.497530937194824, "epoch": 1.81, "learning_rate": 3.7090931046975e-05, "loss": 111.5279, "step": 2138, "task_loss": 2.737508535385132 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9927647866856896, "compression/movement_sparsity/importance_threshold": -3.3158054218197786e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8903401793318497, "compression/movement_sparsity/model_sparsity": 0.8597542551650379, "compression_loss": 106.4114761352539, "distillation_loss": 5.689548969268799, "epoch": 1.81, "learning_rate": 3.708489312884918e-05, "loss": 111.861, "step": 2139, "task_loss": 2.4715375900268555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9928591578361597, "compression/movement_sparsity/importance_threshold": -3.272556334502075e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8904314826834375, "compression/movement_sparsity/model_sparsity": 0.8598424219656167, "compression_loss": 106.42094421386719, "distillation_loss": 5.127225875854492, "epoch": 1.81, "learning_rate": 3.7078855210723344e-05, "loss": 112.0939, "step": 2140, "task_loss": 2.4176976680755615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9929527047897043, "compression/movement_sparsity/importance_threshold": -3.2296849660596455e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.890454818279501, "compression/movement_sparsity/model_sparsity": 0.8598649559121666, "compression_loss": 106.43030548095703, "distillation_loss": 5.254761695861816, "epoch": 1.81, "learning_rate": 3.707281729259751e-05, "loss": 112.0035, "step": 2141, "task_loss": 2.1653659343719482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9930454311612219, "compression/movement_sparsity/importance_threshold": -3.187189659830617e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8906428266306157, "compression/movement_sparsity/model_sparsity": 0.8600465055980392, "compression_loss": 106.43963623046875, "distillation_loss": 5.79201602935791, "epoch": 1.81, "learning_rate": 3.7066779374471685e-05, "loss": 111.4909, "step": 2142, "task_loss": 3.4812557697296143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9931373405656115, "compression/movement_sparsity/importance_threshold": -3.14506875915329e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8906081749994658, "compression/movement_sparsity/model_sparsity": 0.8600130443570202, "compression_loss": 106.44884490966797, "distillation_loss": 5.219759941101074, "epoch": 1.81, "learning_rate": 3.706074145634585e-05, "loss": 112.4693, "step": 2143, "task_loss": 2.679417610168457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9932284366177719, "compression/movement_sparsity/importance_threshold": -3.103320607365963e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8906297935153896, "compression/movement_sparsity/model_sparsity": 0.8600339202104157, "compression_loss": 106.45801544189453, "distillation_loss": 5.55091667175293, "epoch": 1.81, "learning_rate": 3.7054703538220026e-05, "loss": 111.3742, "step": 2144, "task_loss": 2.9986178874969482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9933187229326018, "compression/movement_sparsity/importance_threshold": -3.0619435478068495e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8907024951654655, "compression/movement_sparsity/model_sparsity": 0.860104124335155, "compression_loss": 106.46708679199219, "distillation_loss": 5.180177211761475, "epoch": 1.81, "learning_rate": 3.7048665620094194e-05, "loss": 111.8019, "step": 2145, "task_loss": 2.932173728942871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.993408203125, "compression/movement_sparsity/importance_threshold": -3.0209359238142497e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8908325401377022, "compression/movement_sparsity/model_sparsity": 0.8602297018625303, "compression_loss": 106.47607421875, "distillation_loss": 4.569960594177246, "epoch": 1.81, "learning_rate": 3.704262770196836e-05, "loss": 111.6183, "step": 2146, "task_loss": 2.06699275970459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9934968808098653, "compression/movement_sparsity/importance_threshold": -2.9802960787264632e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8908512849292257, "compression/movement_sparsity/model_sparsity": 0.8602478027127993, "compression_loss": 106.48492431640625, "distillation_loss": 4.466646671295166, "epoch": 1.81, "learning_rate": 3.7036589783842535e-05, "loss": 111.4532, "step": 2147, "task_loss": 2.0774192810058594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9935847596020966, "compression/movement_sparsity/importance_threshold": -2.940022355881703e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8909656973176919, "compression/movement_sparsity/model_sparsity": 0.8603582846837479, "compression_loss": 106.4937744140625, "distillation_loss": 5.219243049621582, "epoch": 1.82, "learning_rate": 3.70305518657167e-05, "loss": 111.719, "step": 2148, "task_loss": 2.7936244010925293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9936718431165926, "compression/movement_sparsity/importance_threshold": -2.9001130986181824e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8910482006335644, "compression/movement_sparsity/model_sparsity": 0.8604379537569103, "compression_loss": 106.50247192382812, "distillation_loss": 6.083998680114746, "epoch": 1.82, "learning_rate": 3.702451394759087e-05, "loss": 111.9779, "step": 2149, "task_loss": 3.169741630554199 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9937581349682522, "compression/movement_sparsity/importance_threshold": -2.8605666502742877e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8911734878629144, "compression/movement_sparsity/model_sparsity": 0.8605589369845036, "compression_loss": 106.5111312866211, "distillation_loss": 4.681313991546631, "epoch": 1.82, "learning_rate": 3.701847602946504e-05, "loss": 111.4976, "step": 2150, "task_loss": 1.8150134086608887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.993843638771974, "compression/movement_sparsity/importance_threshold": -2.8213813541881454e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8912975230546627, "compression/movement_sparsity/model_sparsity": 0.8606787111858384, "compression_loss": 106.5196762084961, "distillation_loss": 5.965396881103516, "epoch": 1.82, "learning_rate": 3.701243811133921e-05, "loss": 112.388, "step": 2151, "task_loss": 3.8050293922424316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9939283581426569, "compression/movement_sparsity/importance_threshold": -2.7825555536982287e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.891248979768217, "compression/movement_sparsity/model_sparsity": 0.8606318355106188, "compression_loss": 106.5280990600586, "distillation_loss": 4.554994106292725, "epoch": 1.82, "learning_rate": 3.7006400193213384e-05, "loss": 111.2714, "step": 2152, "task_loss": 2.791567087173462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9940122966951997, "compression/movement_sparsity/importance_threshold": -2.744087592142664e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8913859884543531, "compression/movement_sparsity/model_sparsity": 0.8607641375268981, "compression_loss": 106.53646850585938, "distillation_loss": 5.1288042068481445, "epoch": 1.82, "learning_rate": 3.700036227508755e-05, "loss": 112.0944, "step": 2153, "task_loss": 2.7086265087127686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9940954580445013, "compression/movement_sparsity/importance_threshold": -2.705975812859751e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8914552082474794, "compression/movement_sparsity/model_sparsity": 0.8608309794071854, "compression_loss": 106.54471588134766, "distillation_loss": 5.081895351409912, "epoch": 1.82, "learning_rate": 3.6994324356961725e-05, "loss": 111.5697, "step": 2154, "task_loss": 2.480844497680664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9941778458054604, "compression/movement_sparsity/importance_threshold": -2.668218559187703e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8915814732585755, "compression/movement_sparsity/model_sparsity": 0.8609529068267138, "compression_loss": 106.55292510986328, "distillation_loss": 5.3984270095825195, "epoch": 1.82, "learning_rate": 3.698828643883589e-05, "loss": 112.3222, "step": 2155, "task_loss": 2.4995384216308594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9942594635929758, "compression/movement_sparsity/importance_threshold": -2.6308141744648197e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8916826021242953, "compression/movement_sparsity/model_sparsity": 0.8610505616047873, "compression_loss": 106.56098175048828, "distillation_loss": 4.84072208404541, "epoch": 1.82, "learning_rate": 3.698224852071006e-05, "loss": 111.2445, "step": 2156, "task_loss": 2.8397762775421143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9943403150219462, "compression/movement_sparsity/importance_threshold": -2.5937610020294877e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8917956790059861, "compression/movement_sparsity/model_sparsity": 0.8611597539477269, "compression_loss": 106.56896209716797, "distillation_loss": 3.9884490966796875, "epoch": 1.82, "learning_rate": 3.6976210602584234e-05, "loss": 111.2259, "step": 2157, "task_loss": 1.9812816381454468 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9944204037072706, "compression/movement_sparsity/importance_threshold": -2.5570573852197465e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.891965145276427, "compression/movement_sparsity/model_sparsity": 0.8613233985304389, "compression_loss": 106.57687377929688, "distillation_loss": 4.716835975646973, "epoch": 1.82, "learning_rate": 3.69701726844584e-05, "loss": 111.6488, "step": 2158, "task_loss": 3.0895802974700928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9944997332638476, "compression/movement_sparsity/importance_threshold": -2.5207016673740694e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8921607850948287, "compression/movement_sparsity/model_sparsity": 0.86151231751922, "compression_loss": 106.58467102050781, "distillation_loss": 4.836824417114258, "epoch": 1.82, "learning_rate": 3.696413476633257e-05, "loss": 111.8544, "step": 2159, "task_loss": 2.6262006759643555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9945783073065761, "compression/movement_sparsity/importance_threshold": -2.484692191830496e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8921944589442323, "compression/movement_sparsity/model_sparsity": 0.8615448345683038, "compression_loss": 106.59242248535156, "distillation_loss": 5.417158126831055, "epoch": 1.83, "learning_rate": 3.695809684820674e-05, "loss": 111.97, "step": 2160, "task_loss": 3.105301856994629 }, { "compression/movement_sparsity/importance_regularization_factor": 0.994656129450355, "compression/movement_sparsity/importance_threshold": -2.4490273019274998e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8922798956053433, "compression/movement_sparsity/model_sparsity": 0.8616273362172716, "compression_loss": 106.60005950927734, "distillation_loss": 3.837585687637329, "epoch": 1.83, "learning_rate": 3.695205893008091e-05, "loss": 111.7199, "step": 2161, "task_loss": 1.6134960651397705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9947332033100829, "compression/movement_sparsity/importance_threshold": -2.4137053410033804e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8924000912151128, "compression/movement_sparsity/model_sparsity": 0.8617434027380806, "compression_loss": 106.60758972167969, "distillation_loss": 6.722909927368164, "epoch": 1.83, "learning_rate": 3.6946021011955076e-05, "loss": 112.3944, "step": 2162, "task_loss": 3.1917309761047363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9948095325006586, "compression/movement_sparsity/importance_threshold": -2.3787246523961775e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8924750346087041, "compression/movement_sparsity/model_sparsity": 0.8618157715955493, "compression_loss": 106.6150894165039, "distillation_loss": 5.746826648712158, "epoch": 1.83, "learning_rate": 3.693998309382925e-05, "loss": 112.4164, "step": 2163, "task_loss": 2.725614309310913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.994885120636981, "compression/movement_sparsity/importance_threshold": -2.3440835794443643e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8925371833704223, "compression/movement_sparsity/model_sparsity": 0.8618757853561104, "compression_loss": 106.62248992919922, "distillation_loss": 4.631133079528809, "epoch": 1.83, "learning_rate": 3.6933945175703424e-05, "loss": 111.8534, "step": 2164, "task_loss": 1.5861016511917114 }, { "compression/movement_sparsity/importance_regularization_factor": 0.994959971333949, "compression/movement_sparsity/importance_threshold": -2.3097804654860672e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8926509995505065, "compression/movement_sparsity/model_sparsity": 0.8619856916002694, "compression_loss": 106.62984466552734, "distillation_loss": 6.951296806335449, "epoch": 1.83, "learning_rate": 3.6927907257577585e-05, "loss": 112.3023, "step": 2165, "task_loss": 3.138481616973877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9950340882064611, "compression/movement_sparsity/importance_threshold": -2.2758136538596728e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8927483246067505, "compression/movement_sparsity/model_sparsity": 0.8620796732414244, "compression_loss": 106.63709259033203, "distillation_loss": 3.292125701904297, "epoch": 1.83, "learning_rate": 3.692186933945176e-05, "loss": 112.2932, "step": 2166, "task_loss": 1.9664015769958496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9951074748694163, "compression/movement_sparsity/importance_threshold": -2.242181487903394e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8929161930387282, "compression/movement_sparsity/model_sparsity": 0.8622417748763399, "compression_loss": 106.64432525634766, "distillation_loss": 4.762248992919922, "epoch": 1.83, "learning_rate": 3.691583142132593e-05, "loss": 111.907, "step": 2167, "task_loss": 2.4153761863708496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9951801349377135, "compression/movement_sparsity/importance_threshold": -2.2088823109553575e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8930432927415588, "compression/movement_sparsity/model_sparsity": 0.862364508313374, "compression_loss": 106.65140533447266, "distillation_loss": 6.531582832336426, "epoch": 1.83, "learning_rate": 3.690979350320009e-05, "loss": 112.9831, "step": 2168, "task_loss": 3.3460395336151123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9952520720262512, "compression/movement_sparsity/importance_threshold": -2.175914466354123e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8931171391117277, "compression/movement_sparsity/model_sparsity": 0.8624358178335495, "compression_loss": 106.65839385986328, "distillation_loss": 5.363083839416504, "epoch": 1.83, "learning_rate": 3.690375558507427e-05, "loss": 111.4688, "step": 2169, "task_loss": 3.035529851913452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9953232897499285, "compression/movement_sparsity/importance_threshold": -2.1432762974377302e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8932597640808203, "compression/movement_sparsity/model_sparsity": 0.8625735431961881, "compression_loss": 106.66531372070312, "distillation_loss": 4.697942733764648, "epoch": 1.83, "learning_rate": 3.689771766694844e-05, "loss": 111.8335, "step": 2170, "task_loss": 2.3191537857055664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.995393791723644, "compression/movement_sparsity/importance_threshold": -2.110966147544479e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8933562305969944, "compression/movement_sparsity/model_sparsity": 0.8626666957907658, "compression_loss": 106.67213439941406, "distillation_loss": 3.894468307495117, "epoch": 1.83, "learning_rate": 3.689167974882261e-05, "loss": 111.7491, "step": 2171, "task_loss": 1.8348608016967773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9954635815622964, "compression/movement_sparsity/importance_threshold": -2.0789823600127558e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.893401137012311, "compression/movement_sparsity/model_sparsity": 0.8627100595325681, "compression_loss": 106.67890930175781, "distillation_loss": 5.148042678833008, "epoch": 1.84, "learning_rate": 3.6885641830696775e-05, "loss": 111.5101, "step": 2172, "task_loss": 2.1044039726257324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9955326628807848, "compression/movement_sparsity/importance_threshold": -2.047323278180687e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8934679481235748, "compression/movement_sparsity/model_sparsity": 0.862774575476625, "compression_loss": 106.6855239868164, "distillation_loss": 4.718634128570557, "epoch": 1.84, "learning_rate": 3.687960391257095e-05, "loss": 111.6168, "step": 2173, "task_loss": 2.5702147483825684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9956010392940078, "compression/movement_sparsity/importance_threshold": -2.0159872453865725e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.893589038045917, "compression/movement_sparsity/model_sparsity": 0.8628915055876185, "compression_loss": 106.69208526611328, "distillation_loss": 4.731832504272461, "epoch": 1.84, "learning_rate": 3.6873565994445116e-05, "loss": 112.6783, "step": 2174, "task_loss": 3.032580614089966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9956687144168642, "compression/movement_sparsity/importance_threshold": -1.984972604968712e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.893783127722526, "compression/movement_sparsity/model_sparsity": 0.8630789276867462, "compression_loss": 106.69865417480469, "distillation_loss": 6.03322696685791, "epoch": 1.84, "learning_rate": 3.6867528076319284e-05, "loss": 112.3084, "step": 2175, "task_loss": 3.243356943130493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9957356918642528, "compression/movement_sparsity/importance_threshold": -1.9542777002654922e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8939040626306889, "compression/movement_sparsity/model_sparsity": 0.8631957081087744, "compression_loss": 106.70512390136719, "distillation_loss": 5.659082889556885, "epoch": 1.84, "learning_rate": 3.686149015819346e-05, "loss": 111.9704, "step": 2176, "task_loss": 4.060610771179199 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9958019752510725, "compression/movement_sparsity/importance_threshold": -1.9239008746149526e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8939389169726885, "compression/movement_sparsity/model_sparsity": 0.8632293650969018, "compression_loss": 106.71151733398438, "distillation_loss": 6.490670204162598, "epoch": 1.84, "learning_rate": 3.685545224006763e-05, "loss": 112.0281, "step": 2177, "task_loss": 3.2682507038116455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9958675681922221, "compression/movement_sparsity/importance_threshold": -1.893840471355393e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8941024927043173, "compression/movement_sparsity/model_sparsity": 0.8633873214989314, "compression_loss": 106.71782684326172, "distillation_loss": 5.476766586303711, "epoch": 1.84, "learning_rate": 3.684941432194179e-05, "loss": 113.1167, "step": 2178, "task_loss": 3.3526976108551025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9959324743026002, "compression/movement_sparsity/importance_threshold": -1.8640948338252e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8941174217621973, "compression/movement_sparsity/model_sparsity": 0.8634017376977462, "compression_loss": 106.72406768798828, "distillation_loss": 6.122330665588379, "epoch": 1.84, "learning_rate": 3.6843376403815966e-05, "loss": 111.8128, "step": 2179, "task_loss": 2.326443910598755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9959966971971058, "compression/movement_sparsity/importance_threshold": -1.8346623053625867e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8941601937515072, "compression/movement_sparsity/model_sparsity": 0.8634430403376412, "compression_loss": 106.73019409179688, "distillation_loss": 4.431092262268066, "epoch": 1.84, "learning_rate": 3.683733848569014e-05, "loss": 111.3946, "step": 2180, "task_loss": 3.100778579711914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9960602404906376, "compression/movement_sparsity/importance_threshold": -1.805541229305853e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8942472640235842, "compression/movement_sparsity/model_sparsity": 0.863527119478013, "compression_loss": 106.73634338378906, "distillation_loss": 4.738229274749756, "epoch": 1.84, "learning_rate": 3.68313005675643e-05, "loss": 111.9939, "step": 2181, "task_loss": 3.4247994422912598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9961231077980944, "compression/movement_sparsity/importance_threshold": -1.776729948993125e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8942088324312939, "compression/movement_sparsity/model_sparsity": 0.8634900081291471, "compression_loss": 106.74227905273438, "distillation_loss": 6.87554931640625, "epoch": 1.84, "learning_rate": 3.6825262649438474e-05, "loss": 112.7011, "step": 2182, "task_loss": 3.2774767875671387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.996185302734375, "compression/movement_sparsity/importance_threshold": -1.748226807762876e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8942830007539889, "compression/movement_sparsity/model_sparsity": 0.8635616285417892, "compression_loss": 106.74824523925781, "distillation_loss": 5.138031959533691, "epoch": 1.84, "learning_rate": 3.681922473131265e-05, "loss": 111.6985, "step": 2183, "task_loss": 3.256697416305542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9962468289143782, "compression/movement_sparsity/importance_threshold": -1.7200301489532326e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8942881042977371, "compression/movement_sparsity/model_sparsity": 0.8635665567631092, "compression_loss": 106.75403594970703, "distillation_loss": 6.378077983856201, "epoch": 1.85, "learning_rate": 3.6813186813186815e-05, "loss": 112.0877, "step": 2184, "task_loss": 2.470500946044922 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9963076899530029, "compression/movement_sparsity/importance_threshold": -1.6921383159024944e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8943923572953775, "compression/movement_sparsity/model_sparsity": 0.8636672283495609, "compression_loss": 106.7599105834961, "distillation_loss": 6.3969197273254395, "epoch": 1.85, "learning_rate": 3.680714889506098e-05, "loss": 112.0532, "step": 2185, "task_loss": 2.86419677734375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9963678894651476, "compression/movement_sparsity/importance_threshold": -1.6645496519489614e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8944195801700902, "compression/movement_sparsity/model_sparsity": 0.8636935160347798, "compression_loss": 106.765625, "distillation_loss": 5.767027854919434, "epoch": 1.85, "learning_rate": 3.6801110976935156e-05, "loss": 112.263, "step": 2186, "task_loss": 2.578387498855591 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9964274310657115, "compression/movement_sparsity/importance_threshold": -1.6372625004308465e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8944407813401467, "compression/movement_sparsity/model_sparsity": 0.8637139888794225, "compression_loss": 106.77127838134766, "distillation_loss": 5.773892402648926, "epoch": 1.85, "learning_rate": 3.6795073058809324e-05, "loss": 112.6165, "step": 2187, "task_loss": 1.9805519580841064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9964863183695931, "compression/movement_sparsity/importance_threshold": -1.6102752046864496e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.894514258061119, "compression/movement_sparsity/model_sparsity": 0.8637849414489885, "compression_loss": 106.77689361572266, "distillation_loss": 7.080057144165039, "epoch": 1.85, "learning_rate": 3.678903514068349e-05, "loss": 112.2074, "step": 2188, "task_loss": 2.882878303527832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9965445549916914, "compression/movement_sparsity/importance_threshold": -1.5835861080540706e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8946780484277651, "compression/movement_sparsity/model_sparsity": 0.8639431051126623, "compression_loss": 106.78244018554688, "distillation_loss": 8.043359756469727, "epoch": 1.85, "learning_rate": 3.6782997222557665e-05, "loss": 112.4699, "step": 2189, "task_loss": 3.0302743911743164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9966021445469049, "compression/movement_sparsity/importance_threshold": -1.5571935538719224e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8947994960751364, "compression/movement_sparsity/model_sparsity": 0.8640603806597297, "compression_loss": 106.78783416748047, "distillation_loss": 5.556333541870117, "epoch": 1.85, "learning_rate": 3.677695930443183e-05, "loss": 111.8372, "step": 2190, "task_loss": 3.200687885284424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9966590906501327, "compression/movement_sparsity/importance_threshold": -1.531095885478305e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8948639581253759, "compression/movement_sparsity/model_sparsity": 0.864122628240235, "compression_loss": 106.79325866699219, "distillation_loss": 5.674887180328369, "epoch": 1.85, "learning_rate": 3.6770921386306e-05, "loss": 112.0338, "step": 2191, "task_loss": 2.6864166259765625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9967153969162734, "compression/movement_sparsity/importance_threshold": -1.5052914462114313e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8949847499435272, "compression/movement_sparsity/model_sparsity": 0.8642392704878337, "compression_loss": 106.79859924316406, "distillation_loss": 6.5648088455200195, "epoch": 1.85, "learning_rate": 3.676488346818017e-05, "loss": 112.2531, "step": 2192, "task_loss": 2.338777780532837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.996771066960226, "compression/movement_sparsity/importance_threshold": -1.4797785794096012e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8950568673093888, "compression/movement_sparsity/model_sparsity": 0.8643089104003191, "compression_loss": 106.80387115478516, "distillation_loss": 5.335129737854004, "epoch": 1.85, "learning_rate": 3.675884555005435e-05, "loss": 112.9511, "step": 2193, "task_loss": 2.2812108993530273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9968261043968891, "compression/movement_sparsity/importance_threshold": -1.4545556284110278e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8950659058284568, "compression/movement_sparsity/model_sparsity": 0.8643176384184513, "compression_loss": 106.80903625488281, "distillation_loss": 6.024045467376709, "epoch": 1.85, "learning_rate": 3.6752807631928514e-05, "loss": 111.7874, "step": 2194, "task_loss": 2.435070037841797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9968805128411616, "compression/movement_sparsity/importance_threshold": -1.4296209365540977e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8951542162139681, "compression/movement_sparsity/model_sparsity": 0.8644029150705457, "compression_loss": 106.8142318725586, "distillation_loss": 4.458570957183838, "epoch": 1.85, "learning_rate": 3.674676971380268e-05, "loss": 111.5307, "step": 2195, "task_loss": 2.8025014400482178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9969342959079422, "compression/movement_sparsity/importance_threshold": -1.4049728471770238e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8952077080299825, "compression/movement_sparsity/model_sparsity": 0.86445456927812, "compression_loss": 106.81937408447266, "distillation_loss": 3.9537572860717773, "epoch": 1.86, "learning_rate": 3.6740731795676855e-05, "loss": 112.0857, "step": 2196, "task_loss": 2.1714389324188232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9969874572121299, "compression/movement_sparsity/importance_threshold": -1.3806097036181061e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8953651905119494, "compression/movement_sparsity/model_sparsity": 0.8646066417523585, "compression_loss": 106.82439422607422, "distillation_loss": 5.759533882141113, "epoch": 1.86, "learning_rate": 3.673469387755102e-05, "loss": 112.3827, "step": 2197, "task_loss": 3.0706708431243896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9970400003686233, "compression/movement_sparsity/importance_threshold": -1.356529849215471e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8953773412387703, "compression/movement_sparsity/model_sparsity": 0.8646183750643331, "compression_loss": 106.82939910888672, "distillation_loss": 4.429379463195801, "epoch": 1.86, "learning_rate": 3.672865595942519e-05, "loss": 111.6882, "step": 2198, "task_loss": 2.3574817180633545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9970919289923212, "compression/movement_sparsity/importance_threshold": -1.3327316273075047e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8955073146660012, "compression/movement_sparsity/model_sparsity": 0.8647438835044936, "compression_loss": 106.83429718017578, "distillation_loss": 5.746835708618164, "epoch": 1.86, "learning_rate": 3.6722618041299364e-05, "loss": 111.37, "step": 2199, "task_loss": 2.712643623352051 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9971432466981225, "compression/movement_sparsity/importance_threshold": -1.3092133812325074e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8956522648477827, "compression/movement_sparsity/model_sparsity": 0.8648838542016121, "compression_loss": 106.83914184570312, "distillation_loss": 5.520914077758789, "epoch": 1.86, "learning_rate": 3.671658012317353e-05, "loss": 112.9341, "step": 2200, "task_loss": 3.05460524559021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.997193957100926, "compression/movement_sparsity/importance_threshold": -1.285973454328692e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8957424711759481, "compression/movement_sparsity/model_sparsity": 0.8649709616648977, "compression_loss": 106.84388732910156, "distillation_loss": 6.240145683288574, "epoch": 1.86, "learning_rate": 3.67105422050477e-05, "loss": 111.9227, "step": 2201, "task_loss": 2.9775376319885254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9972440638156304, "compression/movement_sparsity/importance_threshold": -1.2630101899342716e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8958191793463496, "compression/movement_sparsity/model_sparsity": 0.8650450346736641, "compression_loss": 106.84859466552734, "distillation_loss": 4.504105567932129, "epoch": 1.86, "learning_rate": 3.670450428692187e-05, "loss": 111.469, "step": 2202, "task_loss": 2.0252878665924072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9972935704571345, "compression/movement_sparsity/importance_threshold": -1.240321931387546e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8959416763204728, "compression/movement_sparsity/model_sparsity": 0.8651633234998813, "compression_loss": 106.85321807861328, "distillation_loss": 5.824139595031738, "epoch": 1.86, "learning_rate": 3.669846636879604e-05, "loss": 112.4263, "step": 2203, "task_loss": 3.3804736137390137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9973424806403371, "compression/movement_sparsity/importance_threshold": -1.2179070220269018e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8960155823114798, "compression/movement_sparsity/model_sparsity": 0.8652346905927359, "compression_loss": 106.85782623291016, "distillation_loss": 5.877677917480469, "epoch": 1.86, "learning_rate": 3.669242845067021e-05, "loss": 112.3106, "step": 2204, "task_loss": 4.084338188171387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9973907979801372, "compression/movement_sparsity/importance_threshold": -1.1957638051903786e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8961397725174074, "compression/movement_sparsity/model_sparsity": 0.8653546144830361, "compression_loss": 106.86234283447266, "distillation_loss": 6.613409519195557, "epoch": 1.86, "learning_rate": 3.668639053254438e-05, "loss": 112.3142, "step": 2205, "task_loss": 3.7813472747802734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9974385260914334, "compression/movement_sparsity/importance_threshold": -1.1738906242164497e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8961429204976632, "compression/movement_sparsity/model_sparsity": 0.8653576543204858, "compression_loss": 106.86683654785156, "distillation_loss": 6.924864292144775, "epoch": 1.86, "learning_rate": 3.668035261441855e-05, "loss": 112.3086, "step": 2206, "task_loss": 3.786665439605713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9974856685891245, "compression/movement_sparsity/importance_threshold": -1.1522858224433281e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8962792972029147, "compression/movement_sparsity/model_sparsity": 0.865489346066368, "compression_loss": 106.87127685546875, "distillation_loss": 5.609718322753906, "epoch": 1.87, "learning_rate": 3.667431469629272e-05, "loss": 112.4837, "step": 2207, "task_loss": 3.3107547760009766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975322290881093, "compression/movement_sparsity/importance_threshold": -1.1309477432091403e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8963595230027687, "compression/movement_sparsity/model_sparsity": 0.8655668158631937, "compression_loss": 106.87555694580078, "distillation_loss": 6.640259742736816, "epoch": 1.87, "learning_rate": 3.666827677816689e-05, "loss": 112.8834, "step": 2208, "task_loss": 2.3808703422546387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975782112032866, "compression/movement_sparsity/importance_threshold": -1.1098747298523594e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8964087698151048, "compression/movement_sparsity/model_sparsity": 0.8656143708960251, "compression_loss": 106.8798828125, "distillation_loss": 6.350893497467041, "epoch": 1.87, "learning_rate": 3.666223886004106e-05, "loss": 112.5555, "step": 2209, "task_loss": 3.678065538406372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976236185495553, "compression/movement_sparsity/importance_threshold": -1.0890651257111118e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8965587519956287, "compression/movement_sparsity/model_sparsity": 0.8657592007272489, "compression_loss": 106.88410949707031, "distillation_loss": 5.521470069885254, "epoch": 1.87, "learning_rate": 3.665620094191523e-05, "loss": 112.3853, "step": 2210, "task_loss": 2.7357516288757324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976684547418141, "compression/movement_sparsity/importance_threshold": -1.0685172741237842e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8965528495326489, "compression/movement_sparsity/model_sparsity": 0.8657535010320305, "compression_loss": 106.88822937011719, "distillation_loss": 5.849888324737549, "epoch": 1.87, "learning_rate": 3.66501630237894e-05, "loss": 112.1552, "step": 2211, "task_loss": 2.3647103309631348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977127233949618, "compression/movement_sparsity/importance_threshold": -1.0482295184285027e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.896606126713646, "compression/movement_sparsity/model_sparsity": 0.8658049479779606, "compression_loss": 106.8923110961914, "distillation_loss": 5.078058242797852, "epoch": 1.87, "learning_rate": 3.664412510566357e-05, "loss": 112.4698, "step": 2212, "task_loss": 2.6311631202697754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977564281238972, "compression/movement_sparsity/importance_threshold": -1.028200201963654e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.89664509489348, "compression/movement_sparsity/model_sparsity": 0.8658425774809372, "compression_loss": 106.89628601074219, "distillation_loss": 4.347623825073242, "epoch": 1.87, "learning_rate": 3.663808718753774e-05, "loss": 110.9768, "step": 2213, "task_loss": 2.662334680557251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977995725435191, "compression/movement_sparsity/importance_threshold": -1.0084276680673646e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.896675775776807, "compression/movement_sparsity/model_sparsity": 0.8658722043815364, "compression_loss": 106.90020751953125, "distillation_loss": 4.829845905303955, "epoch": 1.87, "learning_rate": 3.663204926941191e-05, "loss": 111.4318, "step": 2214, "task_loss": 2.688380479812622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9978421602687264, "compression/movement_sparsity/importance_threshold": -9.889102600781075e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.896751160364601, "compression/movement_sparsity/model_sparsity": 0.8659449992768296, "compression_loss": 106.90409088134766, "distillation_loss": 6.038101673126221, "epoch": 1.87, "learning_rate": 3.662601135128608e-05, "loss": 111.9896, "step": 2215, "task_loss": 2.5386788845062256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9978841949144177, "compression/movement_sparsity/importance_threshold": -9.696463213339224e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8967632156980808, "compression/movement_sparsity/model_sparsity": 0.8659566404725179, "compression_loss": 106.90782928466797, "distillation_loss": 6.667707920074463, "epoch": 1.87, "learning_rate": 3.6619973433160246e-05, "loss": 112.4626, "step": 2216, "task_loss": 2.999779462814331 }, { "compression/movement_sparsity/importance_regularization_factor": 0.997925680095492, "compression/movement_sparsity/importance_threshold": -9.506341951732826e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8968590383092027, "compression/movement_sparsity/model_sparsity": 0.8660491712821627, "compression_loss": 106.9115982055664, "distillation_loss": 5.999267101287842, "epoch": 1.87, "learning_rate": 3.661393551503442e-05, "loss": 111.6834, "step": 2217, "task_loss": 3.0775017738342285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9979666194268478, "compression/movement_sparsity/importance_threshold": -9.318722249343145e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8969910388449316, "compression/movement_sparsity/model_sparsity": 0.8661766371934083, "compression_loss": 106.91527557373047, "distillation_loss": 5.044110298156738, "epoch": 1.87, "learning_rate": 3.660789759690859e-05, "loss": 112.1351, "step": 2218, "task_loss": 3.0003836154937744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9980070165233843, "compression/movement_sparsity/importance_threshold": -9.133587539553178e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8970885427636901, "compression/movement_sparsity/model_sparsity": 0.8662707915526003, "compression_loss": 106.91886901855469, "distillation_loss": 4.0648298263549805, "epoch": 1.88, "learning_rate": 3.6601859678782755e-05, "loss": 112.2128, "step": 2219, "task_loss": 2.076323986053467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998046875, "compression/movement_sparsity/importance_threshold": -8.950921255745925e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8971184605002884, "compression/movement_sparsity/model_sparsity": 0.8662996815229088, "compression_loss": 106.9223861694336, "distillation_loss": 5.376125335693359, "epoch": 1.88, "learning_rate": 3.659582176065693e-05, "loss": 112.0173, "step": 2220, "task_loss": 2.621419906616211 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9980861984715937, "compression/movement_sparsity/importance_threshold": -8.770706831304383e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8972004868494555, "compression/movement_sparsity/model_sparsity": 0.8663788900146394, "compression_loss": 106.9259262084961, "distillation_loss": 6.086724281311035, "epoch": 1.88, "learning_rate": 3.6589783842531096e-05, "loss": 112.5079, "step": 2221, "task_loss": 2.6275954246520996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9981249905530645, "compression/movement_sparsity/importance_threshold": -8.592927699609815e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8972783874366207, "compression/movement_sparsity/model_sparsity": 0.8664541144769852, "compression_loss": 106.92936706542969, "distillation_loss": 5.2588911056518555, "epoch": 1.88, "learning_rate": 3.658374592440526e-05, "loss": 112.0885, "step": 2222, "task_loss": 3.0171077251434326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9981632548593108, "compression/movement_sparsity/importance_threshold": -8.417567294046087e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8972463948948537, "compression/movement_sparsity/model_sparsity": 0.8664232209774484, "compression_loss": 106.93275451660156, "distillation_loss": 4.673735618591309, "epoch": 1.88, "learning_rate": 3.657770800627944e-05, "loss": 111.8331, "step": 2223, "task_loss": 3.2889137268066406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982009950052316, "compression/movement_sparsity/importance_threshold": -8.24460904799533e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8972981815548962, "compression/movement_sparsity/model_sparsity": 0.8664732286064042, "compression_loss": 106.9361343383789, "distillation_loss": 5.215034008026123, "epoch": 1.88, "learning_rate": 3.657167008815361e-05, "loss": 111.9634, "step": 2224, "task_loss": 3.61072039604187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982382146057258, "compression/movement_sparsity/importance_threshold": -8.074036394839675e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8973176537206455, "compression/movement_sparsity/model_sparsity": 0.8664920318433567, "compression_loss": 106.93938446044922, "distillation_loss": 7.03035306930542, "epoch": 1.88, "learning_rate": 3.656563217002778e-05, "loss": 112.6237, "step": 2225, "task_loss": 3.01326060295105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998274917275692, "compression/movement_sparsity/importance_threshold": -7.905832767963854e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8972502940976705, "compression/movement_sparsity/model_sparsity": 0.8664269862306533, "compression_loss": 106.94270324707031, "distillation_loss": 4.6075439453125, "epoch": 1.88, "learning_rate": 3.6559594251901945e-05, "loss": 112.0647, "step": 2226, "task_loss": 2.234020948410034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998311106630029, "compression/movement_sparsity/importance_threshold": -7.739981600748264e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8973342998586652, "compression/movement_sparsity/model_sparsity": 0.8665081061353258, "compression_loss": 106.94590759277344, "distillation_loss": 4.90473747253418, "epoch": 1.88, "learning_rate": 3.655355633377612e-05, "loss": 111.9931, "step": 2227, "task_loss": 2.1177470684051514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9983467862836357, "compression/movement_sparsity/importance_threshold": -7.5764663265767695e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8973871477696274, "compression/movement_sparsity/model_sparsity": 0.8665591385579672, "compression_loss": 106.94906616210938, "distillation_loss": 5.844444751739502, "epoch": 1.88, "learning_rate": 3.6547518415650287e-05, "loss": 112.9195, "step": 2228, "task_loss": 4.044680595397949 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9983819598514109, "compression/movement_sparsity/importance_threshold": -7.41527037883237e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8975976808734062, "compression/movement_sparsity/model_sparsity": 0.8667624392019556, "compression_loss": 106.9521713256836, "distillation_loss": 4.34262228012085, "epoch": 1.88, "learning_rate": 3.6541480497524454e-05, "loss": 112.206, "step": 2229, "task_loss": 2.3375632762908936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984166309482533, "compression/movement_sparsity/importance_threshold": -7.256377190896328e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8975973827692153, "compression/movement_sparsity/model_sparsity": 0.8667621513385607, "compression_loss": 106.95518493652344, "distillation_loss": 5.725849151611328, "epoch": 1.88, "learning_rate": 3.653544257939863e-05, "loss": 112.7476, "step": 2230, "task_loss": 2.972702980041504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984508031890618, "compression/movement_sparsity/importance_threshold": -7.099770196152509e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8975813447637451, "compression/movement_sparsity/model_sparsity": 0.8667466642879171, "compression_loss": 106.9581527709961, "distillation_loss": 6.862532138824463, "epoch": 1.89, "learning_rate": 3.6529404661272795e-05, "loss": 112.6943, "step": 2231, "task_loss": 3.1192219257354736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984844801887351, "compression/movement_sparsity/importance_threshold": -6.945432827983045e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8976092592401806, "compression/movement_sparsity/model_sparsity": 0.8667736198162121, "compression_loss": 106.96112823486328, "distillation_loss": 5.918412208557129, "epoch": 1.89, "learning_rate": 3.652336674314696e-05, "loss": 112.7787, "step": 2232, "task_loss": 2.6841914653778076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985176655621721, "compression/movement_sparsity/importance_threshold": -6.793348519770932e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8977634029552094, "compression/movement_sparsity/model_sparsity": 0.8669224682204282, "compression_loss": 106.9639892578125, "distillation_loss": 4.973785400390625, "epoch": 1.89, "learning_rate": 3.6517328825021136e-05, "loss": 112.3501, "step": 2233, "task_loss": 2.6597437858581543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985503629242715, "compression/movement_sparsity/importance_threshold": -6.64350070489917e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8979212431622053, "compression/movement_sparsity/model_sparsity": 0.8670748861307405, "compression_loss": 106.96680450439453, "distillation_loss": 5.590056896209717, "epoch": 1.89, "learning_rate": 3.651129090689531e-05, "loss": 111.9599, "step": 2234, "task_loss": 2.4562201499938965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985825758899322, "compression/movement_sparsity/importance_threshold": -6.495872816749022e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8980324002529069, "compression/movement_sparsity/model_sparsity": 0.8671822246334172, "compression_loss": 106.9695816040039, "distillation_loss": 5.404831886291504, "epoch": 1.89, "learning_rate": 3.650525298876947e-05, "loss": 112.8416, "step": 2235, "task_loss": 2.385239362716675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986143080740528, "compression/movement_sparsity/importance_threshold": -6.350448288704354e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.898110002735881, "compression/movement_sparsity/model_sparsity": 0.8672571612323681, "compression_loss": 106.97232055664062, "distillation_loss": 6.837233543395996, "epoch": 1.89, "learning_rate": 3.6499215070643644e-05, "loss": 112.1945, "step": 2236, "task_loss": 3.59403395652771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986455630915324, "compression/movement_sparsity/importance_threshold": -6.207210554147295e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8982244032001795, "compression/movement_sparsity/model_sparsity": 0.8673676316887808, "compression_loss": 106.9749526977539, "distillation_loss": 5.515789031982422, "epoch": 1.89, "learning_rate": 3.649317715251782e-05, "loss": 112.2925, "step": 2237, "task_loss": 3.1063570976257324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986763445572696, "compression/movement_sparsity/importance_threshold": -6.066143046460845e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.898298654992048, "compression/movement_sparsity/model_sparsity": 0.8674393327031735, "compression_loss": 106.97761535644531, "distillation_loss": 5.791074752807617, "epoch": 1.89, "learning_rate": 3.648713923439198e-05, "loss": 112.6485, "step": 2238, "task_loss": 3.265953540802002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987066560861632, "compression/movement_sparsity/importance_threshold": -5.9272291990280015e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8984400636960417, "compression/movement_sparsity/model_sparsity": 0.867575883583161, "compression_loss": 106.98016357421875, "distillation_loss": 5.674151420593262, "epoch": 1.89, "learning_rate": 3.648110131626615e-05, "loss": 112.6138, "step": 2239, "task_loss": 3.2891762256622314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998736501293112, "compression/movement_sparsity/importance_threshold": -5.790452445230895e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8985261919588755, "compression/movement_sparsity/model_sparsity": 0.867659053075205, "compression_loss": 106.98271179199219, "distillation_loss": 5.513476371765137, "epoch": 1.89, "learning_rate": 3.647506339814033e-05, "loss": 112.8992, "step": 2240, "task_loss": 2.9669833183288574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987658837930148, "compression/movement_sparsity/importance_threshold": -5.6557962184525246e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8985700609716079, "compression/movement_sparsity/model_sparsity": 0.8677014150523931, "compression_loss": 106.98522186279297, "distillation_loss": 7.039402961730957, "epoch": 1.89, "learning_rate": 3.646902548001449e-05, "loss": 112.826, "step": 2241, "task_loss": 2.7848525047302246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987948072007704, "compression/movement_sparsity/importance_threshold": -5.52324395207502e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8986088860614303, "compression/movement_sparsity/model_sparsity": 0.8677389063809401, "compression_loss": 106.98765563964844, "distillation_loss": 4.842825412750244, "epoch": 1.89, "learning_rate": 3.646298756188866e-05, "loss": 112.022, "step": 2242, "task_loss": 2.7310962677001953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988232751312777, "compression/movement_sparsity/importance_threshold": -5.39277907948138e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8986927845049162, "compression/movement_sparsity/model_sparsity": 0.8678199226547905, "compression_loss": 106.99004364013672, "distillation_loss": 6.430970191955566, "epoch": 1.9, "learning_rate": 3.6456949643762835e-05, "loss": 112.4202, "step": 2243, "task_loss": 3.2902655601501465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988512911994354, "compression/movement_sparsity/importance_threshold": -5.2643850340537354e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8987840640081687, "compression/movement_sparsity/model_sparsity": 0.8679080664262977, "compression_loss": 106.99234008789062, "distillation_loss": 5.517462253570557, "epoch": 1.9, "learning_rate": 3.6450911725637e-05, "loss": 113.1874, "step": 2244, "task_loss": 2.6869266033172607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988788590201423, "compression/movement_sparsity/importance_threshold": -5.1380452491759515e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.898798563796014, "compression/movement_sparsity/model_sparsity": 0.8679220681018238, "compression_loss": 106.9946517944336, "distillation_loss": 5.463531970977783, "epoch": 1.9, "learning_rate": 3.644487380751117e-05, "loss": 112.0247, "step": 2245, "task_loss": 2.679300546646118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989059822082972, "compression/movement_sparsity/importance_threshold": -5.013743158229292e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8988369596158015, "compression/movement_sparsity/model_sparsity": 0.8679591449070823, "compression_loss": 106.99685668945312, "distillation_loss": 5.243215084075928, "epoch": 1.9, "learning_rate": 3.643883588938534e-05, "loss": 112.9402, "step": 2246, "task_loss": 3.302549123764038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989326643787988, "compression/movement_sparsity/importance_threshold": -4.891462194598489e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8988820687419679, "compression/movement_sparsity/model_sparsity": 0.868002704395993, "compression_loss": 106.99903106689453, "distillation_loss": 4.323856830596924, "epoch": 1.9, "learning_rate": 3.643279797125951e-05, "loss": 111.28, "step": 2247, "task_loss": 2.584294557571411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989589091465461, "compression/movement_sparsity/importance_threshold": -4.77118579166394e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8989673503888995, "compression/movement_sparsity/model_sparsity": 0.8680850563559955, "compression_loss": 107.00108337402344, "distillation_loss": 5.605971336364746, "epoch": 1.9, "learning_rate": 3.642676005313368e-05, "loss": 112.6995, "step": 2248, "task_loss": 3.639742612838745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989847201264378, "compression/movement_sparsity/importance_threshold": -4.652897382808642e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8990514038465648, "compression/movement_sparsity/model_sparsity": 0.8681662223188112, "compression_loss": 107.003173828125, "distillation_loss": 8.125031471252441, "epoch": 1.9, "learning_rate": 3.642072213500785e-05, "loss": 113.1338, "step": 2249, "task_loss": 3.7542710304260254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990101009333726, "compression/movement_sparsity/importance_threshold": -4.536580401417328e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8991845491023868, "compression/movement_sparsity/model_sparsity": 0.8682947936254931, "compression_loss": 107.00532531738281, "distillation_loss": 5.504183769226074, "epoch": 1.9, "learning_rate": 3.6414684216882026e-05, "loss": 112.3089, "step": 2250, "task_loss": 3.5008509159088135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990350551822496, "compression/movement_sparsity/importance_threshold": -4.422218280870395e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8992241492631056, "compression/movement_sparsity/model_sparsity": 0.8683330333988668, "compression_loss": 107.00731658935547, "distillation_loss": 5.269132614135742, "epoch": 1.9, "learning_rate": 3.6408646298756186e-05, "loss": 112.3358, "step": 2251, "task_loss": 2.2203989028930664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990595864879671, "compression/movement_sparsity/importance_threshold": -4.3097944545517075e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8993004997084779, "compression/movement_sparsity/model_sparsity": 0.8684067609715592, "compression_loss": 107.00928497314453, "distillation_loss": 5.770247936248779, "epoch": 1.9, "learning_rate": 3.640260838063036e-05, "loss": 113.0978, "step": 2252, "task_loss": 2.525223731994629 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990836984654243, "compression/movement_sparsity/importance_threshold": -4.199292355843397e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8994640396676038, "compression/movement_sparsity/model_sparsity": 0.8685646828299813, "compression_loss": 107.01116943359375, "distillation_loss": 6.400417327880859, "epoch": 1.9, "learning_rate": 3.6396570462504534e-05, "loss": 112.2777, "step": 2253, "task_loss": 3.6495871543884277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991073947295199, "compression/movement_sparsity/importance_threshold": -4.090695418127593e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8995950504974191, "compression/movement_sparsity/model_sparsity": 0.868691193034756, "compression_loss": 107.01303100585938, "distillation_loss": 4.188664436340332, "epoch": 1.9, "learning_rate": 3.63905325443787e-05, "loss": 111.029, "step": 2254, "task_loss": 1.3049914836883545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991306788951527, "compression/movement_sparsity/importance_threshold": -3.983987074788163e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8996322658246105, "compression/movement_sparsity/model_sparsity": 0.8687271299009708, "compression_loss": 107.01484680175781, "distillation_loss": 5.125716209411621, "epoch": 1.91, "learning_rate": 3.638449462625287e-05, "loss": 111.99, "step": 2255, "task_loss": 2.465350389480591 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991535545772214, "compression/movement_sparsity/importance_threshold": -3.879150759208104e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8997224960011111, "compression/movement_sparsity/model_sparsity": 0.868814260393328, "compression_loss": 107.01659393310547, "distillation_loss": 4.767354965209961, "epoch": 1.91, "learning_rate": 3.637845670812704e-05, "loss": 111.6338, "step": 2256, "task_loss": 2.872187614440918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999176025390625, "compression/movement_sparsity/importance_threshold": -3.776169904767812e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8998213711991477, "compression/movement_sparsity/model_sparsity": 0.8689097389241364, "compression_loss": 107.018310546875, "distillation_loss": 5.490401268005371, "epoch": 1.91, "learning_rate": 3.637241879000121e-05, "loss": 112.5626, "step": 2257, "task_loss": 2.067721128463745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991980949502621, "compression/movement_sparsity/importance_threshold": -3.6750279448520204e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8999035048658235, "compression/movement_sparsity/model_sparsity": 0.8689890510466891, "compression_loss": 107.02001190185547, "distillation_loss": 5.096080303192139, "epoch": 1.91, "learning_rate": 3.6366380871875377e-05, "loss": 111.7904, "step": 2258, "task_loss": 2.241391181945801 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992197668710315, "compression/movement_sparsity/importance_threshold": -3.5757083128428596e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.900003071665583, "compression/movement_sparsity/model_sparsity": 0.8690851974205736, "compression_loss": 107.02166748046875, "distillation_loss": 5.889667510986328, "epoch": 1.91, "learning_rate": 3.636034295374955e-05, "loss": 111.9997, "step": 2259, "task_loss": 2.6739649772644043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992410447678322, "compression/movement_sparsity/importance_threshold": -3.4781944421224606e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9000627402004329, "compression/movement_sparsity/model_sparsity": 0.8691428161576894, "compression_loss": 107.02323913574219, "distillation_loss": 4.699143886566162, "epoch": 1.91, "learning_rate": 3.635430503562372e-05, "loss": 112.759, "step": 2260, "task_loss": 2.8270082473754883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992619322555627, "compression/movement_sparsity/importance_threshold": -3.382469766074689e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9000875186207802, "compression/movement_sparsity/model_sparsity": 0.8691667433630705, "compression_loss": 107.02485656738281, "distillation_loss": 5.718186378479004, "epoch": 1.91, "learning_rate": 3.6348267117497885e-05, "loss": 112.5832, "step": 2261, "task_loss": 3.48427677154541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999282432949122, "compression/movement_sparsity/importance_threshold": -3.288517718080808e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.900059461054333, "compression/movement_sparsity/model_sparsity": 0.869139649660346, "compression_loss": 107.02635955810547, "distillation_loss": 5.027402877807617, "epoch": 1.91, "learning_rate": 3.634222919937206e-05, "loss": 111.6935, "step": 2262, "task_loss": 2.622799873352051 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993025504634088, "compression/movement_sparsity/importance_threshold": -3.196321731524683e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9001157908222448, "compression/movement_sparsity/model_sparsity": 0.8691940443274394, "compression_loss": 107.02794647216797, "distillation_loss": 6.319218158721924, "epoch": 1.91, "learning_rate": 3.6336191281246226e-05, "loss": 112.0182, "step": 2263, "task_loss": 3.0334315299987793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999322288413322, "compression/movement_sparsity/importance_threshold": -3.1058652397884454e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9002134378310149, "compression/movement_sparsity/model_sparsity": 0.8692883368610609, "compression_loss": 107.0293960571289, "distillation_loss": 7.104345798492432, "epoch": 1.91, "learning_rate": 3.63301533631204e-05, "loss": 112.4066, "step": 2264, "task_loss": 3.809741735458374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993416504137603, "compression/movement_sparsity/importance_threshold": -3.0171316762542255e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.900369763668721, "compression/movement_sparsity/model_sparsity": 0.8694392924253272, "compression_loss": 107.03082275390625, "distillation_loss": 4.702703475952148, "epoch": 1.91, "learning_rate": 3.632411544499457e-05, "loss": 112.317, "step": 2265, "task_loss": 3.14158034324646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993606400796227, "compression/movement_sparsity/importance_threshold": -2.9301044743050217e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9004308034828491, "compression/movement_sparsity/model_sparsity": 0.8694982353340596, "compression_loss": 107.03215789794922, "distillation_loss": 5.008666038513184, "epoch": 1.91, "learning_rate": 3.631807752686874e-05, "loss": 112.2165, "step": 2266, "task_loss": 3.2560806274414062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993792610258077, "compression/movement_sparsity/importance_threshold": -2.8447670673246994e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9005268645773236, "compression/movement_sparsity/model_sparsity": 0.8695909964344204, "compression_loss": 107.03351593017578, "distillation_loss": 4.88998556137085, "epoch": 1.92, "learning_rate": 3.631203960874291e-05, "loss": 111.5212, "step": 2267, "task_loss": 1.9137036800384521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993975168672142, "compression/movement_sparsity/importance_threshold": -2.7611028886936548e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9006011998383655, "compression/movement_sparsity/model_sparsity": 0.8696627780505636, "compression_loss": 107.03478240966797, "distillation_loss": 6.017104625701904, "epoch": 1.92, "learning_rate": 3.6306001690617076e-05, "loss": 111.9164, "step": 2268, "task_loss": 2.3951497077941895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999415411218741, "compression/movement_sparsity/importance_threshold": -2.679095371797488e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9007084338779149, "compression/movement_sparsity/model_sparsity": 0.8697663282709638, "compression_loss": 107.03600311279297, "distillation_loss": 5.582961082458496, "epoch": 1.92, "learning_rate": 3.629996377249125e-05, "loss": 112.4396, "step": 2269, "task_loss": 2.6919801235198975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999432947695287, "compression/movement_sparsity/importance_threshold": -2.598727950015728e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9007312090380993, "compression/movement_sparsity/model_sparsity": 0.8697883210343315, "compression_loss": 107.03724670410156, "distillation_loss": 4.071794033050537, "epoch": 1.92, "learning_rate": 3.629392585436542e-05, "loss": 111.5241, "step": 2270, "task_loss": 2.754196882247925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999450129911751, "compression/movement_sparsity/importance_threshold": -2.5199840567331072e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9008836475971562, "compression/movement_sparsity/model_sparsity": 0.8699355228599288, "compression_loss": 107.03833770751953, "distillation_loss": 5.514983177185059, "epoch": 1.92, "learning_rate": 3.6287887936239584e-05, "loss": 111.3296, "step": 2271, "task_loss": 2.3901877403259277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994669614830316, "compression/movement_sparsity/importance_threshold": -2.442847125331757e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9008893115767832, "compression/movement_sparsity/model_sparsity": 0.8699409922644312, "compression_loss": 107.0394515991211, "distillation_loss": 6.478150367736816, "epoch": 1.92, "learning_rate": 3.628185001811376e-05, "loss": 112.8791, "step": 2272, "task_loss": 2.153325080871582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994834460240277, "compression/movement_sparsity/importance_threshold": -2.367300589194675e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.900928613633311, "compression/movement_sparsity/model_sparsity": 0.86997894417441, "compression_loss": 107.04049682617188, "distillation_loss": 6.111791610717773, "epoch": 1.92, "learning_rate": 3.6275812099987925e-05, "loss": 112.3905, "step": 2273, "task_loss": 2.4266247749328613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994995871496383, "compression/movement_sparsity/importance_threshold": -2.293327881703125e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9009375925315408, "compression/movement_sparsity/model_sparsity": 0.8699876146198633, "compression_loss": 107.04154205322266, "distillation_loss": 5.764986038208008, "epoch": 1.92, "learning_rate": 3.62697741818621e-05, "loss": 112.3202, "step": 2274, "task_loss": 2.614631175994873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995153884747618, "compression/movement_sparsity/importance_threshold": -2.2209124362409724e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.901015099621174, "compression/movement_sparsity/model_sparsity": 0.8700624591025279, "compression_loss": 107.04255676269531, "distillation_loss": 5.8204522132873535, "epoch": 1.92, "learning_rate": 3.6263736263736266e-05, "loss": 112.7249, "step": 2275, "task_loss": 3.109081745147705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995308536142973, "compression/movement_sparsity/importance_threshold": -2.1500376861912154e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9011148214351127, "compression/movement_sparsity/model_sparsity": 0.8701587551653776, "compression_loss": 107.04348754882812, "distillation_loss": 5.115758419036865, "epoch": 1.92, "learning_rate": 3.625769834561043e-05, "loss": 112.2111, "step": 2276, "task_loss": 2.591214656829834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995459861831435, "compression/movement_sparsity/importance_threshold": -2.080687064935985e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9011724151647938, "compression/movement_sparsity/model_sparsity": 0.8702143703732652, "compression_loss": 107.04436492919922, "distillation_loss": 5.734438419342041, "epoch": 1.92, "learning_rate": 3.625166042748461e-05, "loss": 112.5513, "step": 2277, "task_loss": 2.20890736579895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995607897961991, "compression/movement_sparsity/importance_threshold": -2.012844005858279e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9012561585941006, "compression/movement_sparsity/model_sparsity": 0.8702952369581503, "compression_loss": 107.0452651977539, "distillation_loss": 5.212594985961914, "epoch": 1.93, "learning_rate": 3.6245622509358774e-05, "loss": 112.2994, "step": 2278, "task_loss": 2.7288711071014404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995752680683632, "compression/movement_sparsity/importance_threshold": -1.946491942339361e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9013370879198451, "compression/movement_sparsity/model_sparsity": 0.8703733861125879, "compression_loss": 107.04608154296875, "distillation_loss": 6.174812316894531, "epoch": 1.93, "learning_rate": 3.623958459123294e-05, "loss": 112.3976, "step": 2279, "task_loss": 3.9466781616210938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995894246145341, "compression/movement_sparsity/importance_threshold": -1.8816143077648312e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9013944789386766, "compression/movement_sparsity/model_sparsity": 0.870428805573367, "compression_loss": 107.0468978881836, "distillation_loss": 4.751241683959961, "epoch": 1.93, "learning_rate": 3.6233546673107116e-05, "loss": 112.8104, "step": 2280, "task_loss": 2.2461724281311035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996032630496111, "compression/movement_sparsity/importance_threshold": -1.8181945355142184e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9014801421589727, "compression/movement_sparsity/model_sparsity": 0.8705115259985149, "compression_loss": 107.04769897460938, "distillation_loss": 5.029261589050293, "epoch": 1.93, "learning_rate": 3.622750875498128e-05, "loss": 112.0709, "step": 2281, "task_loss": 3.2090063095092773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996167869884928, "compression/movement_sparsity/importance_threshold": -1.7562160589713882e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.901438872614785, "compression/movement_sparsity/model_sparsity": 0.87047167419013, "compression_loss": 107.04843139648438, "distillation_loss": 6.6411285400390625, "epoch": 1.93, "learning_rate": 3.622147083685546e-05, "loss": 112.6415, "step": 2282, "task_loss": 3.6849865913391113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996300000460779, "compression/movement_sparsity/importance_threshold": -1.6956623115193387e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9015415516222973, "compression/movement_sparsity/model_sparsity": 0.8705708258578568, "compression_loss": 107.04910278320312, "distillation_loss": 4.377174377441406, "epoch": 1.93, "learning_rate": 3.6215432918729624e-05, "loss": 112.1281, "step": 2283, "task_loss": 2.8364622592926025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996429058372653, "compression/movement_sparsity/importance_threshold": -1.6365167265402006e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9015871854118398, "compression/movement_sparsity/model_sparsity": 0.8706148919863426, "compression_loss": 107.04981994628906, "distillation_loss": 5.237322807312012, "epoch": 1.93, "learning_rate": 3.62093950006038e-05, "loss": 112.1862, "step": 2284, "task_loss": 2.119804859161377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996555079769538, "compression/movement_sparsity/importance_threshold": -1.5787627374178395e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9016718708503897, "compression/movement_sparsity/model_sparsity": 0.8706966682195554, "compression_loss": 107.05050659179688, "distillation_loss": 6.341668128967285, "epoch": 1.93, "learning_rate": 3.6203357082477965e-05, "loss": 112.6506, "step": 2285, "task_loss": 3.530973196029663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996678100800421, "compression/movement_sparsity/importance_threshold": -1.5223837775335189e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9016553797265493, "compression/movement_sparsity/model_sparsity": 0.8706807436165515, "compression_loss": 107.05115509033203, "distillation_loss": 5.136467933654785, "epoch": 1.93, "learning_rate": 3.619731916435213e-05, "loss": 112.4165, "step": 2286, "task_loss": 2.652858018875122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996798157614292, "compression/movement_sparsity/importance_threshold": -1.4673632802702369e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.901701263923612, "compression/movement_sparsity/model_sparsity": 0.8707250515502889, "compression_loss": 107.0517578125, "distillation_loss": 5.2743682861328125, "epoch": 1.93, "learning_rate": 3.6191281246226306e-05, "loss": 112.0168, "step": 2287, "task_loss": 2.848407745361328 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996915286360136, "compression/movement_sparsity/importance_threshold": -1.413684679011859e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9017613975009997, "compression/movement_sparsity/model_sparsity": 0.8707831193543007, "compression_loss": 107.0523681640625, "distillation_loss": 6.485002040863037, "epoch": 1.93, "learning_rate": 3.6185243328100473e-05, "loss": 112.7979, "step": 2288, "task_loss": 2.732428550720215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997029523186944, "compression/movement_sparsity/importance_threshold": -1.3613314071387814e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9018230215993419, "compression/movement_sparsity/model_sparsity": 0.870842626475287, "compression_loss": 107.05290985107422, "distillation_loss": 5.472529411315918, "epoch": 1.93, "learning_rate": 3.617920540997464e-05, "loss": 112.1221, "step": 2289, "task_loss": 2.602665424346924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997140904243702, "compression/movement_sparsity/importance_threshold": -1.3102868980357368e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9018973926328867, "compression/movement_sparsity/model_sparsity": 0.8709144426350375, "compression_loss": 107.05349731445312, "distillation_loss": 5.967824935913086, "epoch": 1.94, "learning_rate": 3.6173167491848815e-05, "loss": 112.6215, "step": 2290, "task_loss": 2.0356647968292236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997249465679399, "compression/movement_sparsity/importance_threshold": -1.2605345850839889e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9019129298233163, "compression/movement_sparsity/model_sparsity": 0.8709294460751777, "compression_loss": 107.0539321899414, "distillation_loss": 5.067270278930664, "epoch": 1.94, "learning_rate": 3.616712957372298e-05, "loss": 112.0386, "step": 2291, "task_loss": 3.7969939708709717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997355243643022, "compression/movement_sparsity/importance_threshold": -1.212057901667403e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9020297508936448, "compression/movement_sparsity/model_sparsity": 0.8710422539823569, "compression_loss": 107.05439758300781, "distillation_loss": 4.979169845581055, "epoch": 1.94, "learning_rate": 3.616109165559715e-05, "loss": 112.3685, "step": 2292, "task_loss": 2.7683141231536865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997458274283559, "compression/movement_sparsity/importance_threshold": -1.16484028116811e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9021075918599717, "compression/movement_sparsity/model_sparsity": 0.8711174208720237, "compression_loss": 107.05481719970703, "distillation_loss": 5.8390212059021, "epoch": 1.94, "learning_rate": 3.615505373747132e-05, "loss": 111.9735, "step": 2293, "task_loss": 3.065803050994873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999755859375, "compression/movement_sparsity/importance_threshold": -1.1188651569682406e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.902279013693905, "compression/movement_sparsity/model_sparsity": 0.871282953838606, "compression_loss": 107.05518341064453, "distillation_loss": 6.423513412475586, "epoch": 1.94, "learning_rate": 3.614901581934549e-05, "loss": 112.8502, "step": 2294, "task_loss": 3.085240125656128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999765623819133, "compression/movement_sparsity/importance_threshold": -1.0741159624516605e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9023402919913855, "compression/movement_sparsity/model_sparsity": 0.8713421270380541, "compression_loss": 107.05558776855469, "distillation_loss": 5.229470252990723, "epoch": 1.94, "learning_rate": 3.614297790121966e-05, "loss": 111.9301, "step": 2295, "task_loss": 2.5372400283813477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999775124375654, "compression/movement_sparsity/importance_threshold": -1.030576130999633e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9023823485306373, "compression/movement_sparsity/model_sparsity": 0.8713827388058014, "compression_loss": 107.05587768554688, "distillation_loss": 5.267242431640625, "epoch": 1.94, "learning_rate": 3.613693998309383e-05, "loss": 112.5745, "step": 2296, "task_loss": 4.02814245223999 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997843646594615, "compression/movement_sparsity/importance_threshold": -9.882290959951565e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9024194684644877, "compression/movement_sparsity/model_sparsity": 0.8714185835557299, "compression_loss": 107.05618286132812, "distillation_loss": 4.727000713348389, "epoch": 1.94, "learning_rate": 3.6130902064968005e-05, "loss": 111.924, "step": 2297, "task_loss": 2.965517044067383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997933482854545, "compression/movement_sparsity/importance_threshold": -9.470582908220962e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9023345206942498, "compression/movement_sparsity/model_sparsity": 0.8713365540027296, "compression_loss": 107.05644989013672, "distillation_loss": 7.024869918823242, "epoch": 1.94, "learning_rate": 3.6124864146842166e-05, "loss": 113.0193, "step": 2298, "task_loss": 3.528736114501953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998020788685317, "compression/movement_sparsity/importance_threshold": -9.070471488617157e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9024426609705395, "compression/movement_sparsity/model_sparsity": 0.8714409793278503, "compression_loss": 107.05673217773438, "distillation_loss": 4.63880729675293, "epoch": 1.94, "learning_rate": 3.611882622871634e-05, "loss": 112.4872, "step": 2299, "task_loss": 2.218899965286255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998105600235919, "compression/movement_sparsity/importance_threshold": -8.681791034978806e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9024259552116817, "compression/movement_sparsity/model_sparsity": 0.8714248474632021, "compression_loss": 107.05695343017578, "distillation_loss": 5.79630184173584, "epoch": 1.94, "learning_rate": 3.6112788310590513e-05, "loss": 113.2047, "step": 2300, "task_loss": 3.9030065536499023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999818795365534, "compression/movement_sparsity/importance_threshold": -8.304375881118542e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9025564533022885, "compression/movement_sparsity/model_sparsity": 0.8715508625429376, "compression_loss": 107.05724334716797, "distillation_loss": 6.362673759460449, "epoch": 1.94, "learning_rate": 3.610675039246468e-05, "loss": 112.4321, "step": 2301, "task_loss": 2.439337730407715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998267885092567, "compression/movement_sparsity/importance_threshold": -7.938060360875021e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9025977228464762, "compression/movement_sparsity/model_sparsity": 0.8715907143513225, "compression_loss": 107.05741119384766, "distillation_loss": 4.949580669403076, "epoch": 1.95, "learning_rate": 3.610071247433885e-05, "loss": 111.7335, "step": 2302, "task_loss": 2.022193431854248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998345430696587, "compression/movement_sparsity/importance_threshold": -7.582678808078225e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.902679498788123, "compression/movement_sparsity/model_sparsity": 0.8716696810378014, "compression_loss": 107.0576171875, "distillation_loss": 4.77168083190918, "epoch": 1.95, "learning_rate": 3.609467455621302e-05, "loss": 111.8537, "step": 2303, "task_loss": 3.167419672012329 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999842062661639, "compression/movement_sparsity/importance_threshold": -7.238065556540788e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9027466199277453, "compression/movement_sparsity/model_sparsity": 0.871734496359789, "compression_loss": 107.05780792236328, "distillation_loss": 4.209288597106934, "epoch": 1.95, "learning_rate": 3.608863663808719e-05, "loss": 111.731, "step": 2304, "task_loss": 3.2324163913726807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998493509000963, "compression/movement_sparsity/importance_threshold": -6.904054940092691e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9028567515400301, "compression/movement_sparsity/model_sparsity": 0.8718408446123873, "compression_loss": 107.05793762207031, "distillation_loss": 5.1611647605896, "epoch": 1.95, "learning_rate": 3.6082598719961356e-05, "loss": 111.7316, "step": 2305, "task_loss": 2.880117893218994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998564113999294, "compression/movement_sparsity/importance_threshold": -6.580481292563917e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9029188287567425, "compression/movement_sparsity/model_sparsity": 0.8719007892857337, "compression_loss": 107.0580825805664, "distillation_loss": 7.996528625488281, "epoch": 1.95, "learning_rate": 3.607656080183553e-05, "loss": 112.8186, "step": 2306, "task_loss": 3.295424222946167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998632477760372, "compression/movement_sparsity/importance_threshold": -6.267178947784446e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9029409361635393, "compression/movement_sparsity/model_sparsity": 0.8719221372350967, "compression_loss": 107.05817413330078, "distillation_loss": 5.087790489196777, "epoch": 1.95, "learning_rate": 3.6070522883709704e-05, "loss": 111.7022, "step": 2307, "task_loss": 2.896566152572632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998698636433183, "compression/movement_sparsity/importance_threshold": -5.963982239575588e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.902956795306495, "compression/movement_sparsity/model_sparsity": 0.8719374515677035, "compression_loss": 107.05825805664062, "distillation_loss": 6.733245372772217, "epoch": 1.95, "learning_rate": 3.6064484965583864e-05, "loss": 112.3669, "step": 2308, "task_loss": 3.0039451122283936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998762626166716, "compression/movement_sparsity/importance_threshold": -5.670725501767324e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9030163803721714, "compression/movement_sparsity/model_sparsity": 0.8719949897030688, "compression_loss": 107.05833435058594, "distillation_loss": 5.672686576843262, "epoch": 1.95, "learning_rate": 3.605844704745804e-05, "loss": 112.779, "step": 2309, "task_loss": 2.6441993713378906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998824483109959, "compression/movement_sparsity/importance_threshold": -5.387243068189634e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9031306615947935, "compression/movement_sparsity/model_sparsity": 0.8721053450141236, "compression_loss": 107.05831146240234, "distillation_loss": 5.272998332977295, "epoch": 1.95, "learning_rate": 3.605240912933221e-05, "loss": 112.0496, "step": 2310, "task_loss": 3.2136390209198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99988842434119, "compression/movement_sparsity/importance_threshold": -5.113369272663829e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9032723207063076, "compression/movement_sparsity/model_sparsity": 0.8722421376993628, "compression_loss": 107.05833435058594, "distillation_loss": 5.986939430236816, "epoch": 1.95, "learning_rate": 3.604637121120637e-05, "loss": 112.4561, "step": 2311, "task_loss": 2.530019998550415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998941943221527, "compression/movement_sparsity/importance_threshold": -4.848938449011214e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9033155815864905, "compression/movement_sparsity/model_sparsity": 0.8722839124352254, "compression_loss": 107.0583267211914, "distillation_loss": 7.1093220710754395, "epoch": 1.95, "learning_rate": 3.604033329308055e-05, "loss": 112.8177, "step": 2312, "task_loss": 3.978306770324707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998997618687827, "compression/movement_sparsity/importance_threshold": -4.5937849310704465e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9034258324404517, "compression/movement_sparsity/model_sparsity": 0.8723903758331817, "compression_loss": 107.05831909179688, "distillation_loss": 5.361395835876465, "epoch": 1.95, "learning_rate": 3.603429537495472e-05, "loss": 113.08, "step": 2313, "task_loss": 2.9708898067474365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999905130595979, "compression/movement_sparsity/importance_threshold": -4.34774305265416e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9035111498598862, "compression/movement_sparsity/model_sparsity": 0.8724727623367917, "compression_loss": 107.05828857421875, "distillation_loss": 4.2557172775268555, "epoch": 1.96, "learning_rate": 3.602825745682888e-05, "loss": 111.9793, "step": 2314, "task_loss": 2.212693214416504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999103041186402, "compression/movement_sparsity/importance_threshold": -4.11064714760101e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9035899447596241, "compression/movement_sparsity/model_sparsity": 0.872548850389322, "compression_loss": 107.05819702148438, "distillation_loss": 6.8656487464904785, "epoch": 1.96, "learning_rate": 3.6022219538703055e-05, "loss": 112.4848, "step": 2315, "task_loss": 2.784536123275757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999152860516652, "compression/movement_sparsity/importance_threshold": -3.882331549732304e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.903617847311892, "compression/movement_sparsity/model_sparsity": 0.8725757944030812, "compression_loss": 107.05811309814453, "distillation_loss": 5.497105598449707, "epoch": 1.96, "learning_rate": 3.601618162057723e-05, "loss": 111.8836, "step": 2316, "task_loss": 3.5857832431793213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999200800099528, "compression/movement_sparsity/importance_threshold": -3.662630592886698e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9036456425466511, "compression/movement_sparsity/model_sparsity": 0.8726026347860183, "compression_loss": 107.05797576904297, "distillation_loss": 4.53317403793335, "epoch": 1.96, "learning_rate": 3.6010143702451396e-05, "loss": 112.2338, "step": 2317, "task_loss": 3.0411126613616943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999246896084018, "compression/movement_sparsity/importance_threshold": -3.4513786108681527e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9036964514249476, "compression/movement_sparsity/model_sparsity": 0.8726516982230388, "compression_loss": 107.0577621459961, "distillation_loss": 4.9958906173706055, "epoch": 1.96, "learning_rate": 3.6004105784325563e-05, "loss": 112.0232, "step": 2318, "task_loss": 2.2165703773498535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999291184619109, "compression/movement_sparsity/importance_threshold": -3.248409937515323e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9037823531285963, "compression/movement_sparsity/model_sparsity": 0.8727346489389027, "compression_loss": 107.05754852294922, "distillation_loss": 5.056425094604492, "epoch": 1.96, "learning_rate": 3.599806786619974e-05, "loss": 111.7814, "step": 2319, "task_loss": 1.912427306175232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999933370185379, "compression/movement_sparsity/importance_threshold": -3.0535589066668645e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9037750436138355, "compression/movement_sparsity/model_sparsity": 0.8727275905284606, "compression_loss": 107.05733489990234, "distillation_loss": 6.380853652954102, "epoch": 1.96, "learning_rate": 3.5992029948073905e-05, "loss": 112.6725, "step": 2320, "task_loss": 3.779951810836792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999374483937048, "compression/movement_sparsity/importance_threshold": -2.866659852126738e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9038868684579247, "compression/movement_sparsity/model_sparsity": 0.8728355738451419, "compression_loss": 107.05712127685547, "distillation_loss": 4.698399543762207, "epoch": 1.96, "learning_rate": 3.598599202994807e-05, "loss": 112.4414, "step": 2321, "task_loss": 3.0404486656188965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999413567017872, "compression/movement_sparsity/importance_threshold": -2.687547107742272e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9039168458153612, "compression/movement_sparsity/model_sparsity": 0.8728645213881293, "compression_loss": 107.05682373046875, "distillation_loss": 4.8979172706604, "epoch": 1.96, "learning_rate": 3.5979954111822246e-05, "loss": 112.3378, "step": 2322, "task_loss": 2.4429025650024414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999450987245249, "compression/movement_sparsity/importance_threshold": -2.516055007326101e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9038826115300787, "compression/movement_sparsity/model_sparsity": 0.8728314631558632, "compression_loss": 107.05657196044922, "distillation_loss": 6.107687473297119, "epoch": 1.96, "learning_rate": 3.597391619369642e-05, "loss": 112.5439, "step": 2323, "task_loss": 3.0134336948394775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999486780768168, "compression/movement_sparsity/importance_threshold": -2.3520178847082074e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9039343027967801, "compression/movement_sparsity/model_sparsity": 0.8728813786685325, "compression_loss": 107.05628967285156, "distillation_loss": 6.132275104522705, "epoch": 1.96, "learning_rate": 3.596787827557058e-05, "loss": 112.5165, "step": 2324, "task_loss": 3.3298778533935547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999520983735616, "compression/movement_sparsity/importance_threshold": -2.195270073718572e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9041104942977677, "compression/movement_sparsity/model_sparsity": 0.8730515174494327, "compression_loss": 107.05594635009766, "distillation_loss": 6.11865234375, "epoch": 1.96, "learning_rate": 3.5961840357444754e-05, "loss": 112.7928, "step": 2325, "task_loss": 3.11938214302063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999553632296582, "compression/movement_sparsity/importance_threshold": -2.0456459081785033e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9042371885788987, "compression/movement_sparsity/model_sparsity": 0.8731738593922497, "compression_loss": 107.0556411743164, "distillation_loss": 4.97293758392334, "epoch": 1.97, "learning_rate": 3.595580243931893e-05, "loss": 112.9323, "step": 2326, "task_loss": 3.308603286743164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999584762600052, "compression/movement_sparsity/importance_threshold": -1.9029797219179828e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9042539539585948, "compression/movement_sparsity/model_sparsity": 0.8731900488295768, "compression_loss": 107.05535125732422, "distillation_loss": 5.241941452026367, "epoch": 1.97, "learning_rate": 3.5949764521193095e-05, "loss": 112.024, "step": 2327, "task_loss": 3.163323402404785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999614410795017, "compression/movement_sparsity/importance_threshold": -1.7671058487669922e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9043399749039198, "compression/movement_sparsity/model_sparsity": 0.8732731146907986, "compression_loss": 107.05497741699219, "distillation_loss": 5.601337909698486, "epoch": 1.97, "learning_rate": 3.594372660306726e-05, "loss": 111.6594, "step": 2328, "task_loss": 2.609093427658081 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999642613030463, "compression/movement_sparsity/importance_threshold": -1.6378586225468394e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9043556432601934, "compression/movement_sparsity/model_sparsity": 0.8732882447908327, "compression_loss": 107.05460357666016, "distillation_loss": 5.2559661865234375, "epoch": 1.97, "learning_rate": 3.5937688684941436e-05, "loss": 112.1124, "step": 2329, "task_loss": 2.507340431213379 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999669405455378, "compression/movement_sparsity/importance_threshold": -1.5150723770788327e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9044548523349237, "compression/movement_sparsity/model_sparsity": 0.8733840457286433, "compression_loss": 107.05424499511719, "distillation_loss": 5.220808982849121, "epoch": 1.97, "learning_rate": 3.5931650766815604e-05, "loss": 112.9571, "step": 2330, "task_loss": 2.510662794113159 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999969482421875, "compression/movement_sparsity/importance_threshold": -1.3985814462103008e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.904491519150404, "compression/movement_sparsity/model_sparsity": 0.8734194529262115, "compression_loss": 107.05380249023438, "distillation_loss": 6.264935493469238, "epoch": 1.97, "learning_rate": 3.592561284868977e-05, "loss": 112.8593, "step": 2331, "task_loss": 3.4844398498535156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999718905469568, "compression/movement_sparsity/importance_threshold": -1.2882201637452045e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9045611562893975, "compression/movement_sparsity/model_sparsity": 0.8734866978152516, "compression_loss": 107.05341339111328, "distillation_loss": 4.688966751098633, "epoch": 1.97, "learning_rate": 3.5919574930563945e-05, "loss": 111.6497, "step": 2332, "task_loss": 2.1565444469451904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999741685356818, "compression/movement_sparsity/importance_threshold": -1.1838228635308728e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9045298791976886, "compression/movement_sparsity/model_sparsity": 0.8734564951878625, "compression_loss": 107.05294036865234, "distillation_loss": 4.01136589050293, "epoch": 1.97, "learning_rate": 3.591353701243811e-05, "loss": 112.2197, "step": 2333, "task_loss": 2.3189046382904053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999976320002949, "compression/movement_sparsity/importance_threshold": -1.0852238793712665e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9045596657684429, "compression/movement_sparsity/model_sparsity": 0.8734852584982772, "compression_loss": 107.05242156982422, "distillation_loss": 6.6900634765625, "epoch": 1.97, "learning_rate": 3.590749909431228e-05, "loss": 112.626, "step": 2334, "task_loss": 3.5498740673065186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999783485636571, "compression/movement_sparsity/importance_threshold": -9.922575451137144e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9045976800148661, "compression/movement_sparsity/model_sparsity": 0.8735219668383902, "compression_loss": 107.05195617675781, "distillation_loss": 3.8514933586120605, "epoch": 1.97, "learning_rate": 3.590146117618645e-05, "loss": 112.0904, "step": 2335, "task_loss": 2.1213533878326416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999802578327048, "compression/movement_sparsity/importance_threshold": -9.04758194570851e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.904591920641898, "compression/movement_sparsity/model_sparsity": 0.8735164053176014, "compression_loss": 107.05140686035156, "distillation_loss": 5.618868827819824, "epoch": 1.97, "learning_rate": 3.589542325806062e-05, "loss": 112.5834, "step": 2336, "task_loss": 3.422750473022461 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999820514249912, "compression/movement_sparsity/importance_threshold": -8.22560161572658e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9046127283144225, "compression/movement_sparsity/model_sparsity": 0.873536498182563, "compression_loss": 107.0508804321289, "distillation_loss": 5.819667816162109, "epoch": 1.97, "learning_rate": 3.5889385339934794e-05, "loss": 112.3306, "step": 2337, "task_loss": 2.186988353729248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999837329554148, "compression/movement_sparsity/importance_threshold": -7.454977799491169e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9046388899382156, "compression/movement_sparsity/model_sparsity": 0.8735617610740961, "compression_loss": 107.05039978027344, "distillation_loss": 5.074985504150391, "epoch": 1.98, "learning_rate": 3.588334742180896e-05, "loss": 111.8055, "step": 2338, "task_loss": 4.376002788543701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999853060388745, "compression/movement_sparsity/importance_threshold": -6.734053835215359e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9047610053389745, "compression/movement_sparsity/model_sparsity": 0.873679681435168, "compression_loss": 107.04986572265625, "distillation_loss": 4.5952019691467285, "epoch": 1.98, "learning_rate": 3.5877309503683135e-05, "loss": 110.9649, "step": 2339, "task_loss": 1.5312023162841797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999986774290269, "compression/movement_sparsity/importance_threshold": -6.061173061285702e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9047800601588566, "compression/movement_sparsity/model_sparsity": 0.8736980816633677, "compression_loss": 107.04924774169922, "distillation_loss": 5.593252658843994, "epoch": 1.98, "learning_rate": 3.58712715855573e-05, "loss": 112.1904, "step": 2340, "task_loss": 2.073976755142212 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999881413244974, "compression/movement_sparsity/importance_threshold": -5.434678815828542e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9049270374491363, "compression/movement_sparsity/model_sparsity": 0.8738400098315713, "compression_loss": 107.04866790771484, "distillation_loss": 6.77750825881958, "epoch": 1.98, "learning_rate": 3.586523366743147e-05, "loss": 112.6618, "step": 2341, "task_loss": 2.9232850074768066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999894107564582, "compression/movement_sparsity/importance_threshold": -4.852914437143696e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9050289056132494, "compression/movement_sparsity/model_sparsity": 0.873938378510864, "compression_loss": 107.04816436767578, "distillation_loss": 6.931528091430664, "epoch": 1.98, "learning_rate": 3.5859195749305644e-05, "loss": 112.2451, "step": 2342, "task_loss": 3.822505235671997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999905862010502, "compression/movement_sparsity/importance_threshold": -4.314223263617717e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9050946793219289, "compression/movement_sparsity/model_sparsity": 0.8740018926903067, "compression_loss": 107.04761505126953, "distillation_loss": 5.275445938110352, "epoch": 1.98, "learning_rate": 3.585315783117981e-05, "loss": 112.4289, "step": 2343, "task_loss": 2.817155599594116 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999916712731723, "compression/movement_sparsity/importance_threshold": -3.816948633376949e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9051709105256249, "compression/movement_sparsity/model_sparsity": 0.8740755051176412, "compression_loss": 107.04705810546875, "distillation_loss": 5.428786277770996, "epoch": 1.98, "learning_rate": 3.584711991305398e-05, "loss": 111.6752, "step": 2344, "task_loss": 2.5568342208862305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999926695877234, "compression/movement_sparsity/importance_threshold": -3.359433884721208e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9052347048224768, "compression/movement_sparsity/model_sparsity": 0.874137107884142, "compression_loss": 107.04649353027344, "distillation_loss": 4.584506511688232, "epoch": 1.98, "learning_rate": 3.584108199492815e-05, "loss": 112.7676, "step": 2345, "task_loss": 2.2256548404693604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999935847596021, "compression/movement_sparsity/importance_threshold": -2.9400223558635752e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9053169458066613, "compression/movement_sparsity/model_sparsity": 0.8742165236375169, "compression_loss": 107.04594421386719, "distillation_loss": 4.739934921264648, "epoch": 1.98, "learning_rate": 3.583504407680232e-05, "loss": 111.8488, "step": 2346, "task_loss": 2.158292055130005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999944204037072, "compression/movement_sparsity/importance_threshold": -2.5570573852773393e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9053843054296363, "compression/movement_sparsity/model_sparsity": 0.8742815692502203, "compression_loss": 107.04535675048828, "distillation_loss": 5.500659942626953, "epoch": 1.98, "learning_rate": 3.582900615867649e-05, "loss": 111.567, "step": 2347, "task_loss": 2.9850924015045166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999951801349377, "compression/movement_sparsity/importance_threshold": -2.208882310915372e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9054655328595719, "compression/movement_sparsity/model_sparsity": 0.8743600062680527, "compression_loss": 107.04474639892578, "distillation_loss": 5.137619495391846, "epoch": 1.98, "learning_rate": 3.582296824055066e-05, "loss": 112.2382, "step": 2348, "task_loss": 3.3764069080352783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999958675681923, "compression/movement_sparsity/importance_threshold": -1.893840471337699e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9055719679798896, "compression/movement_sparsity/model_sparsity": 0.8744627850145548, "compression_loss": 107.04419708251953, "distillation_loss": 6.803169250488281, "epoch": 1.99, "learning_rate": 3.581693032242483e-05, "loss": 112.0182, "step": 2349, "task_loss": 2.442497491836548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999964863183696, "compression/movement_sparsity/importance_threshold": -1.6102752046706637e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9055868970377696, "compression/movement_sparsity/model_sparsity": 0.8744772012133696, "compression_loss": 107.0436782836914, "distillation_loss": 5.284263610839844, "epoch": 1.99, "learning_rate": 3.5810892404299e-05, "loss": 112.3747, "step": 2350, "task_loss": 3.5034706592559814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999970400003686, "compression/movement_sparsity/importance_threshold": -1.3565298492140831e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9056099226054745, "compression/movement_sparsity/model_sparsity": 0.8744994357819889, "compression_loss": 107.04312133789062, "distillation_loss": 5.141788959503174, "epoch": 1.99, "learning_rate": 3.580485448617317e-05, "loss": 112.3067, "step": 2351, "task_loss": 2.4364359378814697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999975322290882, "compression/movement_sparsity/importance_threshold": -1.1309477431810377e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9056272126485465, "compression/movement_sparsity/model_sparsity": 0.8745161318588909, "compression_loss": 107.04251861572266, "distillation_loss": 5.209827423095703, "epoch": 1.99, "learning_rate": 3.5798816568047336e-05, "loss": 112.0751, "step": 2352, "task_loss": 3.2885141372680664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999979666194269, "compression/movement_sparsity/importance_threshold": -9.318722249580802e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9056692095669601, "compression/movement_sparsity/model_sparsity": 0.8745566860539593, "compression_loss": 107.0418930053711, "distillation_loss": 5.452573776245117, "epoch": 1.99, "learning_rate": 3.579277864992151e-05, "loss": 112.43, "step": 2353, "task_loss": 3.0674750804901123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999983467862836, "compression/movement_sparsity/importance_threshold": -7.576466326715547e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.905734601702275, "compression/movement_sparsity/model_sparsity": 0.8746198317682565, "compression_loss": 107.04124450683594, "distillation_loss": 4.946695327758789, "epoch": 1.99, "learning_rate": 3.578674073179568e-05, "loss": 112.464, "step": 2354, "task_loss": 2.1332285404205322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999986763445573, "compression/movement_sparsity/importance_threshold": -6.06614304621278e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.905787783489931, "compression/movement_sparsity/model_sparsity": 0.8746711865979002, "compression_loss": 107.0406265258789, "distillation_loss": 5.747748374938965, "epoch": 1.99, "learning_rate": 3.5780702813669844e-05, "loss": 112.001, "step": 2355, "task_loss": 2.985844135284424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999989589091466, "compression/movement_sparsity/importance_threshold": -4.7711857910706645e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.905786114106462, "compression/movement_sparsity/model_sparsity": 0.874669574562889, "compression_loss": 107.03995513916016, "distillation_loss": 4.946298599243164, "epoch": 1.99, "learning_rate": 3.577466489554402e-05, "loss": 111.634, "step": 2356, "task_loss": 2.27164626121521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999991980949503, "compression/movement_sparsity/importance_threshold": -3.675027944287368e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9058541057103218, "compression/movement_sparsity/model_sparsity": 0.8747352304459894, "compression_loss": 107.03937530517578, "distillation_loss": 6.781465530395508, "epoch": 1.99, "learning_rate": 3.576862697741819e-05, "loss": 112.5599, "step": 2357, "task_loss": 2.941093683242798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999993975168672, "compression/movement_sparsity/importance_threshold": -2.7611028888610556e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9058855139678745, "compression/movement_sparsity/model_sparsity": 0.8747655597332722, "compression_loss": 107.03874969482422, "distillation_loss": 6.461554527282715, "epoch": 1.99, "learning_rate": 3.576258905929236e-05, "loss": 112.6721, "step": 2358, "task_loss": 3.177823066711426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999995607897962, "compression/movement_sparsity/importance_threshold": -2.01284400605517e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9058561208946522, "compression/movement_sparsity/model_sparsity": 0.8747371764025387, "compression_loss": 107.03809356689453, "distillation_loss": 5.240389347076416, "epoch": 1.99, "learning_rate": 3.5756551141166526e-05, "loss": 112.3491, "step": 2359, "task_loss": 2.9463047981262207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999999691528636, "compression/movement_sparsity/importance_threshold": -1.413684678867877e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9058396059224765, "compression/movement_sparsity/model_sparsity": 0.8747212287704632, "compression_loss": 107.0374984741211, "distillation_loss": 7.295163631439209, "epoch": 1.99, "learning_rate": 3.57505132230407e-05, "loss": 112.5864, "step": 2360, "task_loss": 4.100258827209473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999997933482855, "compression/movement_sparsity/importance_threshold": -9.470582902973423e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9058778228797495, "compression/movement_sparsity/model_sparsity": 0.8747581328576848, "compression_loss": 107.03684997558594, "distillation_loss": 3.4351401329040527, "epoch": 2.0, "learning_rate": 3.574447530491487e-05, "loss": 112.243, "step": 2361, "task_loss": 1.374301791191101 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999998698636433, "compression/movement_sparsity/importance_threshold": -5.963982242090937e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9059748140592996, "compression/movement_sparsity/model_sparsity": 0.8748517920918375, "compression_loss": 107.03622436523438, "distillation_loss": 7.033247947692871, "epoch": 2.0, "learning_rate": 3.5738437386789035e-05, "loss": 113.111, "step": 2362, "task_loss": 3.485999584197998 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999246896084, "compression/movement_sparsity/importance_threshold": -3.4513786099921173e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9060032770474463, "compression/movement_sparsity/model_sparsity": 0.8748792772887791, "compression_loss": 107.03559112548828, "distillation_loss": 4.228334426879883, "epoch": 2.0, "learning_rate": 3.573239946866321e-05, "loss": 112.48, "step": 2363, "task_loss": 1.517110824584961 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999614410795, "compression/movement_sparsity/importance_threshold": -1.7671058453322397e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9060744881765678, "compression/movement_sparsity/model_sparsity": 0.8749480420965441, "compression_loss": 107.03498840332031, "distillation_loss": 4.9608025550842285, "epoch": 2.0, "learning_rate": 3.5726361550537376e-05, "loss": 111.8071, "step": 2364, "task_loss": 3.0712032318115234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999837329554, "compression/movement_sparsity/importance_threshold": -7.454977780929628e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.906107685059266, "compression/movement_sparsity/model_sparsity": 0.8749800985641961, "compression_loss": 107.03436279296875, "distillation_loss": 4.839308261871338, "epoch": 2.0, "learning_rate": 3.572032363241154e-05, "loss": 112.012, "step": 2365, "task_loss": 2.169099807739258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999951801349, "compression/movement_sparsity/importance_threshold": -2.208882295823278e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.906149992006038, "compression/movement_sparsity/model_sparsity": 0.8750209521371951, "compression_loss": 107.03374481201172, "distillation_loss": 4.6223626136779785, "epoch": 2.0, "learning_rate": 3.571428571428572e-05, "loss": 110.9369, "step": 2366, "task_loss": 1.7837203741073608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 4.349185943603516, "epoch": 2.0, "learning_rate": 3.570824779615989e-05, "loss": 88.1394, "step": 2367, "task_loss": 3.242206335067749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 2.7205944061279297, "epoch": 2.0, "learning_rate": 3.570220987803405e-05, "loss": 2.4848, "step": 2368, "task_loss": 0.9353877902030945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 2.565199375152588, "epoch": 2.0, "learning_rate": 3.5696171959908225e-05, "loss": 2.5251, "step": 2369, "task_loss": 1.6911174058914185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 2.5849132537841797, "epoch": 2.0, "learning_rate": 3.56901340417824e-05, "loss": 2.3868, "step": 2370, "task_loss": 1.6777386665344238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 2.0580406188964844, "epoch": 2.0, "learning_rate": 3.568409612365656e-05, "loss": 1.6513, "step": 2371, "task_loss": 1.3811471462249756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.9813215732574463, "epoch": 2.01, "learning_rate": 3.5678058205530734e-05, "loss": 2.0614, "step": 2372, "task_loss": 1.8123201131820679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 2.1894760131835938, "epoch": 2.01, "learning_rate": 3.567202028740491e-05, "loss": 1.8342, "step": 2373, "task_loss": 1.10967218875885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 2.4108667373657227, "epoch": 2.01, "learning_rate": 3.5665982369279075e-05, "loss": 2.1589, "step": 2374, "task_loss": 1.0267047882080078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.51359224319458, "epoch": 2.01, "learning_rate": 3.565994445115324e-05, "loss": 1.6169, "step": 2375, "task_loss": 1.1415445804595947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.6596897840499878, "epoch": 2.01, "learning_rate": 3.5653906533027416e-05, "loss": 1.4382, "step": 2376, "task_loss": 0.9350376725196838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.4334663152694702, "epoch": 2.01, "learning_rate": 3.564786861490158e-05, "loss": 1.5606, "step": 2377, "task_loss": 1.589817762374878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.5624778270721436, "epoch": 2.01, "learning_rate": 3.564183069677575e-05, "loss": 1.6488, "step": 2378, "task_loss": 0.6257479786872864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.764270305633545, "epoch": 2.01, "learning_rate": 3.5635792778649924e-05, "loss": 1.5796, "step": 2379, "task_loss": 1.0914125442504883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 2.424124002456665, "epoch": 2.01, "learning_rate": 3.56297548605241e-05, "loss": 1.3184, "step": 2380, "task_loss": 0.9658804535865784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1204251050949097, "epoch": 2.01, "learning_rate": 3.562371694239826e-05, "loss": 1.3729, "step": 2381, "task_loss": 0.9968458414077759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1970728635787964, "epoch": 2.01, "learning_rate": 3.561767902427243e-05, "loss": 1.2539, "step": 2382, "task_loss": 0.6155930161476135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.6255215406417847, "epoch": 2.01, "learning_rate": 3.5611641106146607e-05, "loss": 1.1851, "step": 2383, "task_loss": 0.881079912185669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.796355128288269, "epoch": 2.02, "learning_rate": 3.560560318802077e-05, "loss": 1.4786, "step": 2384, "task_loss": 1.1558482646942139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.2527644634246826, "epoch": 2.02, "learning_rate": 3.559956526989494e-05, "loss": 1.2262, "step": 2385, "task_loss": 1.169134259223938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.140057921409607, "epoch": 2.02, "learning_rate": 3.5593527351769115e-05, "loss": 1.0266, "step": 2386, "task_loss": 0.8942002058029175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1648374795913696, "epoch": 2.02, "learning_rate": 3.558748943364328e-05, "loss": 1.1415, "step": 2387, "task_loss": 0.7604730129241943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.3427975177764893, "epoch": 2.02, "learning_rate": 3.558145151551745e-05, "loss": 1.142, "step": 2388, "task_loss": 0.2794044017791748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.2760496139526367, "epoch": 2.02, "learning_rate": 3.557541359739162e-05, "loss": 1.2425, "step": 2389, "task_loss": 1.810643196105957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1636450290679932, "epoch": 2.02, "learning_rate": 3.556937567926579e-05, "loss": 1.161, "step": 2390, "task_loss": 1.560205340385437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5843997001647949, "epoch": 2.02, "learning_rate": 3.556333776113996e-05, "loss": 0.7892, "step": 2391, "task_loss": 0.4454798400402069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0602747201919556, "epoch": 2.02, "learning_rate": 3.555729984301413e-05, "loss": 1.3698, "step": 2392, "task_loss": 0.7375110983848572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0878949165344238, "epoch": 2.02, "learning_rate": 3.55512619248883e-05, "loss": 1.0823, "step": 2393, "task_loss": 1.8166459798812866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.6793479919433594, "epoch": 2.02, "learning_rate": 3.5545224006762466e-05, "loss": 1.3468, "step": 2394, "task_loss": 0.942431628704071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 2.756063461303711, "epoch": 2.02, "learning_rate": 3.553918608863664e-05, "loss": 1.4078, "step": 2395, "task_loss": 1.4610316753387451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.4060542583465576, "epoch": 2.03, "learning_rate": 3.5533148170510814e-05, "loss": 0.9869, "step": 2396, "task_loss": 1.2283142805099487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0540175437927246, "epoch": 2.03, "learning_rate": 3.552711025238498e-05, "loss": 1.019, "step": 2397, "task_loss": 0.9596708416938782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7507641911506653, "epoch": 2.03, "learning_rate": 3.552107233425915e-05, "loss": 0.8784, "step": 2398, "task_loss": 1.0418905019760132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1250498294830322, "epoch": 2.03, "learning_rate": 3.551503441613332e-05, "loss": 0.9075, "step": 2399, "task_loss": 1.4816206693649292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1478986740112305, "epoch": 2.03, "learning_rate": 3.550899649800749e-05, "loss": 1.1714, "step": 2400, "task_loss": 0.869472861289978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.3664865493774414, "epoch": 2.03, "learning_rate": 3.5502958579881656e-05, "loss": 1.1591, "step": 2401, "task_loss": 1.0699517726898193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.4158215522766113, "epoch": 2.03, "learning_rate": 3.549692066175583e-05, "loss": 1.1683, "step": 2402, "task_loss": 1.3688056468963623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9395220279693604, "epoch": 2.03, "learning_rate": 3.549088274363e-05, "loss": 0.9789, "step": 2403, "task_loss": 0.6936195492744446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6571758985519409, "epoch": 2.03, "learning_rate": 3.5484844825504165e-05, "loss": 0.9297, "step": 2404, "task_loss": 1.4356839656829834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.081473708152771, "epoch": 2.03, "learning_rate": 3.547880690737834e-05, "loss": 1.0121, "step": 2405, "task_loss": 0.5474168658256531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6815711855888367, "epoch": 2.03, "learning_rate": 3.5472768989252506e-05, "loss": 0.7679, "step": 2406, "task_loss": 0.2519635856151581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6317338943481445, "epoch": 2.03, "learning_rate": 3.546673107112668e-05, "loss": 0.919, "step": 2407, "task_loss": 0.2557592988014221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7807583808898926, "epoch": 2.04, "learning_rate": 3.546069315300085e-05, "loss": 0.9908, "step": 2408, "task_loss": 1.3724546432495117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5837604999542236, "epoch": 2.04, "learning_rate": 3.5454655234875014e-05, "loss": 0.8557, "step": 2409, "task_loss": 0.7127106785774231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7362096309661865, "epoch": 2.04, "learning_rate": 3.544861731674919e-05, "loss": 1.0274, "step": 2410, "task_loss": 0.5370011329650879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5308130979537964, "epoch": 2.04, "learning_rate": 3.5442579398623355e-05, "loss": 0.8473, "step": 2411, "task_loss": 0.5682175755500793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1610183715820312, "epoch": 2.04, "learning_rate": 3.543654148049753e-05, "loss": 1.2029, "step": 2412, "task_loss": 1.5658528804779053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5979350209236145, "epoch": 2.04, "learning_rate": 3.5430503562371697e-05, "loss": 1.0191, "step": 2413, "task_loss": 0.5844428539276123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0601483583450317, "epoch": 2.04, "learning_rate": 3.5424465644245864e-05, "loss": 0.7759, "step": 2414, "task_loss": 0.7965730428695679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.019435167312622, "epoch": 2.04, "learning_rate": 3.541842772612004e-05, "loss": 1.0308, "step": 2415, "task_loss": 0.5301811695098877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.079788088798523, "epoch": 2.04, "learning_rate": 3.5412389807994205e-05, "loss": 1.2275, "step": 2416, "task_loss": 1.1540182828903198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8021439909934998, "epoch": 2.04, "learning_rate": 3.540635188986838e-05, "loss": 0.8782, "step": 2417, "task_loss": 0.3859696090221405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9913812875747681, "epoch": 2.04, "learning_rate": 3.5400313971742546e-05, "loss": 1.1456, "step": 2418, "task_loss": 0.6196288466453552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8446938395500183, "epoch": 2.04, "learning_rate": 3.539427605361671e-05, "loss": 1.2024, "step": 2419, "task_loss": 0.42238253355026245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8118801712989807, "epoch": 2.05, "learning_rate": 3.538823813549089e-05, "loss": 0.8211, "step": 2420, "task_loss": 0.6557321548461914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.134967565536499, "epoch": 2.05, "learning_rate": 3.5382200217365054e-05, "loss": 1.1971, "step": 2421, "task_loss": 1.2240535020828247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1765506267547607, "epoch": 2.05, "learning_rate": 3.537616229923922e-05, "loss": 0.9878, "step": 2422, "task_loss": 1.5448672771453857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7704134583473206, "epoch": 2.05, "learning_rate": 3.5370124381113395e-05, "loss": 0.8612, "step": 2423, "task_loss": 0.5969558358192444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9213511943817139, "epoch": 2.05, "learning_rate": 3.536408646298756e-05, "loss": 1.0592, "step": 2424, "task_loss": 0.38683590292930603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.238837480545044, "epoch": 2.05, "learning_rate": 3.535804854486173e-05, "loss": 1.0872, "step": 2425, "task_loss": 1.0126333236694336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.6734137535095215, "epoch": 2.05, "learning_rate": 3.5352010626735904e-05, "loss": 1.0164, "step": 2426, "task_loss": 1.6473370790481567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.668118417263031, "epoch": 2.05, "learning_rate": 3.534597270861008e-05, "loss": 0.6908, "step": 2427, "task_loss": 0.9471316337585449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6154012680053711, "epoch": 2.05, "learning_rate": 3.533993479048424e-05, "loss": 0.9916, "step": 2428, "task_loss": 0.5970145463943481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8368748426437378, "epoch": 2.05, "learning_rate": 3.533389687235841e-05, "loss": 0.7785, "step": 2429, "task_loss": 0.8316482901573181 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0642261505126953, "epoch": 2.05, "learning_rate": 3.5327858954232586e-05, "loss": 1.0563, "step": 2430, "task_loss": 1.7626588344573975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4558733105659485, "epoch": 2.05, "learning_rate": 3.532182103610675e-05, "loss": 0.6555, "step": 2431, "task_loss": 0.3980146050453186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0583751201629639, "epoch": 2.06, "learning_rate": 3.531578311798092e-05, "loss": 1.0287, "step": 2432, "task_loss": 1.2744895219802856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1265721321105957, "epoch": 2.06, "learning_rate": 3.5309745199855094e-05, "loss": 0.8923, "step": 2433, "task_loss": 1.1121697425842285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3979009985923767, "epoch": 2.06, "learning_rate": 3.530370728172926e-05, "loss": 0.8268, "step": 2434, "task_loss": 0.9548539519309998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1648811101913452, "epoch": 2.06, "learning_rate": 3.529766936360343e-05, "loss": 1.0118, "step": 2435, "task_loss": 1.6961398124694824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1112327575683594, "epoch": 2.06, "learning_rate": 3.52916314454776e-05, "loss": 0.8923, "step": 2436, "task_loss": 0.976320207118988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9347637891769409, "epoch": 2.06, "learning_rate": 3.528559352735178e-05, "loss": 0.9284, "step": 2437, "task_loss": 0.6215044260025024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6903398633003235, "epoch": 2.06, "learning_rate": 3.527955560922594e-05, "loss": 1.0, "step": 2438, "task_loss": 1.1031486988067627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4344136118888855, "epoch": 2.06, "learning_rate": 3.527351769110011e-05, "loss": 0.8953, "step": 2439, "task_loss": 0.3574349880218506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.140205979347229, "epoch": 2.06, "learning_rate": 3.5267479772974285e-05, "loss": 0.9154, "step": 2440, "task_loss": 0.7313425540924072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6056913137435913, "epoch": 2.06, "learning_rate": 3.5261441854848445e-05, "loss": 0.5436, "step": 2441, "task_loss": 0.33786725997924805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6707737445831299, "epoch": 2.06, "learning_rate": 3.525540393672262e-05, "loss": 0.8383, "step": 2442, "task_loss": 0.3613990843296051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.381842851638794, "epoch": 2.07, "learning_rate": 3.524936601859679e-05, "loss": 0.9134, "step": 2443, "task_loss": 1.5536361932754517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0843620300292969, "epoch": 2.07, "learning_rate": 3.5243328100470954e-05, "loss": 0.7936, "step": 2444, "task_loss": 1.426203966140747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0823248624801636, "epoch": 2.07, "learning_rate": 3.523729018234513e-05, "loss": 0.8735, "step": 2445, "task_loss": 1.160457730293274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.699629008769989, "epoch": 2.07, "learning_rate": 3.52312522642193e-05, "loss": 0.9332, "step": 2446, "task_loss": 0.9147905111312866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3874680995941162, "epoch": 2.07, "learning_rate": 3.522521434609347e-05, "loss": 0.6572, "step": 2447, "task_loss": 0.14993901550769806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.6780500411987305, "epoch": 2.07, "learning_rate": 3.5219176427967636e-05, "loss": 1.0311, "step": 2448, "task_loss": 2.4915900230407715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8990777730941772, "epoch": 2.07, "learning_rate": 3.521313850984181e-05, "loss": 0.721, "step": 2449, "task_loss": 0.45720523595809937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5212827324867249, "epoch": 2.07, "learning_rate": 3.520710059171598e-05, "loss": 0.7715, "step": 2450, "task_loss": 0.36768829822540283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40501323342323303, "epoch": 2.07, "learning_rate": 3.5201062673590144e-05, "loss": 0.6422, "step": 2451, "task_loss": 0.40096068382263184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0289720296859741, "epoch": 2.07, "learning_rate": 3.519502475546432e-05, "loss": 0.7915, "step": 2452, "task_loss": 1.7456183433532715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1048853397369385, "epoch": 2.07, "learning_rate": 3.518898683733849e-05, "loss": 0.866, "step": 2453, "task_loss": 1.430674433708191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1326031684875488, "epoch": 2.07, "learning_rate": 3.518294891921265e-05, "loss": 0.9134, "step": 2454, "task_loss": 0.6537672877311707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5145963430404663, "epoch": 2.08, "learning_rate": 3.517691100108683e-05, "loss": 0.8022, "step": 2455, "task_loss": 0.796389639377594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1991320848464966, "epoch": 2.08, "learning_rate": 3.5170873082961e-05, "loss": 0.9342, "step": 2456, "task_loss": 1.1727824211120605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.153314471244812, "epoch": 2.08, "learning_rate": 3.516483516483517e-05, "loss": 0.8515, "step": 2457, "task_loss": 1.2837263345718384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6867859363555908, "epoch": 2.08, "learning_rate": 3.5158797246709335e-05, "loss": 0.898, "step": 2458, "task_loss": 0.7602800130844116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8566561937332153, "epoch": 2.08, "learning_rate": 3.515275932858351e-05, "loss": 0.7669, "step": 2459, "task_loss": 1.108353853225708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7859554290771484, "epoch": 2.08, "learning_rate": 3.5146721410457676e-05, "loss": 0.8483, "step": 2460, "task_loss": 1.122902274131775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9361256957054138, "epoch": 2.08, "learning_rate": 3.514068349233184e-05, "loss": 1.1369, "step": 2461, "task_loss": 1.1887872219085693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8102313876152039, "epoch": 2.08, "learning_rate": 3.513464557420602e-05, "loss": 0.9012, "step": 2462, "task_loss": 0.10270078480243683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.580176830291748, "epoch": 2.08, "learning_rate": 3.5128607656080184e-05, "loss": 0.88, "step": 2463, "task_loss": 0.24691437184810638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6343140006065369, "epoch": 2.08, "learning_rate": 3.512256973795435e-05, "loss": 0.5748, "step": 2464, "task_loss": 0.3584992587566376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7808558940887451, "epoch": 2.08, "learning_rate": 3.5116531819828526e-05, "loss": 0.8155, "step": 2465, "task_loss": 0.6913083791732788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9794888496398926, "epoch": 2.08, "learning_rate": 3.511049390170269e-05, "loss": 0.7098, "step": 2466, "task_loss": 1.1354312896728516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5807239413261414, "epoch": 2.09, "learning_rate": 3.510445598357686e-05, "loss": 0.5799, "step": 2467, "task_loss": 0.5701643228530884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8118342757225037, "epoch": 2.09, "learning_rate": 3.5098418065451034e-05, "loss": 0.7221, "step": 2468, "task_loss": 0.7796528339385986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9054543972015381, "epoch": 2.09, "learning_rate": 3.509238014732521e-05, "loss": 0.6543, "step": 2469, "task_loss": 1.605278491973877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.102556824684143, "epoch": 2.09, "learning_rate": 3.5086342229199375e-05, "loss": 0.7278, "step": 2470, "task_loss": 1.1261076927185059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7592182159423828, "epoch": 2.09, "learning_rate": 3.508030431107354e-05, "loss": 0.6934, "step": 2471, "task_loss": 0.7925288081169128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5732554197311401, "epoch": 2.09, "learning_rate": 3.5074266392947716e-05, "loss": 0.849, "step": 2472, "task_loss": 1.1981027126312256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7470301985740662, "epoch": 2.09, "learning_rate": 3.506822847482188e-05, "loss": 0.8349, "step": 2473, "task_loss": 0.7301526069641113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0529500246047974, "epoch": 2.09, "learning_rate": 3.506219055669605e-05, "loss": 1.0253, "step": 2474, "task_loss": 1.3047890663146973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.799798846244812, "epoch": 2.09, "learning_rate": 3.5056152638570225e-05, "loss": 1.0667, "step": 2475, "task_loss": 0.7873750329017639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5219271183013916, "epoch": 2.09, "learning_rate": 3.505011472044439e-05, "loss": 0.6071, "step": 2476, "task_loss": 0.1861841231584549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.4437048435211182, "epoch": 2.09, "learning_rate": 3.504407680231856e-05, "loss": 0.9243, "step": 2477, "task_loss": 1.6513155698776245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6540335416793823, "epoch": 2.09, "learning_rate": 3.503803888419273e-05, "loss": 0.7815, "step": 2478, "task_loss": 0.9857924580574036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8486277461051941, "epoch": 2.1, "learning_rate": 3.50320009660669e-05, "loss": 0.7601, "step": 2479, "task_loss": 0.6257694363594055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9827658534049988, "epoch": 2.1, "learning_rate": 3.5025963047941074e-05, "loss": 0.8603, "step": 2480, "task_loss": 0.40496402978897095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6278359293937683, "epoch": 2.1, "learning_rate": 3.501992512981524e-05, "loss": 0.7432, "step": 2481, "task_loss": 0.7563680410385132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4470987319946289, "epoch": 2.1, "learning_rate": 3.501388721168941e-05, "loss": 0.7478, "step": 2482, "task_loss": 0.589605987071991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7129452228546143, "epoch": 2.1, "learning_rate": 3.500784929356358e-05, "loss": 0.6693, "step": 2483, "task_loss": 0.6439433097839355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.62811279296875, "epoch": 2.1, "learning_rate": 3.500181137543775e-05, "loss": 0.6495, "step": 2484, "task_loss": 1.0140297412872314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5585733652114868, "epoch": 2.1, "learning_rate": 3.499577345731192e-05, "loss": 0.6621, "step": 2485, "task_loss": 0.44986873865127563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4508213400840759, "epoch": 2.1, "learning_rate": 3.498973553918609e-05, "loss": 0.6878, "step": 2486, "task_loss": 1.3385705947875977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0636061429977417, "epoch": 2.1, "learning_rate": 3.498369762106026e-05, "loss": 0.9505, "step": 2487, "task_loss": 1.3804337978363037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 2.260240316390991, "epoch": 2.1, "learning_rate": 3.497765970293443e-05, "loss": 1.3619, "step": 2488, "task_loss": 2.549764394760132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1724748611450195, "epoch": 2.1, "learning_rate": 3.49716217848086e-05, "loss": 0.8501, "step": 2489, "task_loss": 1.8429583311080933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7266545295715332, "epoch": 2.1, "learning_rate": 3.496558386668277e-05, "loss": 0.6057, "step": 2490, "task_loss": 0.5760114192962646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6508098840713501, "epoch": 2.11, "learning_rate": 3.495954594855694e-05, "loss": 0.7531, "step": 2491, "task_loss": 1.1736574172973633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9003543853759766, "epoch": 2.11, "learning_rate": 3.495350803043111e-05, "loss": 0.837, "step": 2492, "task_loss": 1.3907275199890137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8071339130401611, "epoch": 2.11, "learning_rate": 3.494747011230528e-05, "loss": 0.9225, "step": 2493, "task_loss": 1.1090761423110962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0824297666549683, "epoch": 2.11, "learning_rate": 3.494143219417945e-05, "loss": 0.8358, "step": 2494, "task_loss": 0.4579342007637024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5819069147109985, "epoch": 2.11, "learning_rate": 3.4935394276053616e-05, "loss": 0.8469, "step": 2495, "task_loss": 0.5355756282806396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6108310222625732, "epoch": 2.11, "learning_rate": 3.492935635792779e-05, "loss": 0.8231, "step": 2496, "task_loss": 0.5172597169876099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8570882081985474, "epoch": 2.11, "learning_rate": 3.492331843980196e-05, "loss": 0.6343, "step": 2497, "task_loss": 0.9644769430160522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7916079759597778, "epoch": 2.11, "learning_rate": 3.4917280521676124e-05, "loss": 0.7277, "step": 2498, "task_loss": 1.6640433073043823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.087581992149353, "epoch": 2.11, "learning_rate": 3.49112426035503e-05, "loss": 0.7249, "step": 2499, "task_loss": 0.9530234336853027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6886265277862549, "epoch": 2.11, "learning_rate": 3.490520468542447e-05, "loss": 0.6337, "step": 2500, "task_loss": 0.30300816893577576 }, { "epoch": 2.11, "eval_accuracy": 0.8826138613861386, "eval_loss": 0.4864577353000641, "eval_runtime": 319.2335, "eval_samples_per_second": 79.096, "eval_steps_per_second": 0.62, "step": 2500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7372449636459351, "epoch": 2.11, "learning_rate": 3.489916676729863e-05, "loss": 0.6497, "step": 2501, "task_loss": 1.0769312381744385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6813398599624634, "epoch": 2.11, "learning_rate": 3.4893128849172806e-05, "loss": 0.8553, "step": 2502, "task_loss": 0.47747910022735596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.579484760761261, "epoch": 2.12, "learning_rate": 3.488709093104698e-05, "loss": 0.9153, "step": 2503, "task_loss": 0.4548407793045044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6433004140853882, "epoch": 2.12, "learning_rate": 3.488105301292115e-05, "loss": 0.7384, "step": 2504, "task_loss": 0.3672809898853302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8981249332427979, "epoch": 2.12, "learning_rate": 3.4875015094795315e-05, "loss": 0.7991, "step": 2505, "task_loss": 1.2803148031234741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.001046061515808, "epoch": 2.12, "learning_rate": 3.486897717666949e-05, "loss": 0.9026, "step": 2506, "task_loss": 1.2839548587799072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9328277111053467, "epoch": 2.12, "learning_rate": 3.4862939258543656e-05, "loss": 0.7995, "step": 2507, "task_loss": 0.4065932631492615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5227213501930237, "epoch": 2.12, "learning_rate": 3.485690134041782e-05, "loss": 0.6682, "step": 2508, "task_loss": 0.9641379714012146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.279910922050476, "epoch": 2.12, "learning_rate": 3.4850863422292e-05, "loss": 0.8093, "step": 2509, "task_loss": 1.5428448915481567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7935410737991333, "epoch": 2.12, "learning_rate": 3.484482550416617e-05, "loss": 0.5643, "step": 2510, "task_loss": 0.34139499068260193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1751829385757446, "epoch": 2.12, "learning_rate": 3.483878758604033e-05, "loss": 1.0316, "step": 2511, "task_loss": 1.018160104751587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5592427253723145, "epoch": 2.12, "learning_rate": 3.4832749667914505e-05, "loss": 0.9108, "step": 2512, "task_loss": 0.8563842177391052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6486711502075195, "epoch": 2.12, "learning_rate": 3.482671174978868e-05, "loss": 0.6907, "step": 2513, "task_loss": 0.2726428508758545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9659405946731567, "epoch": 2.13, "learning_rate": 3.482067383166284e-05, "loss": 0.9215, "step": 2514, "task_loss": 0.8788712024688721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7546584010124207, "epoch": 2.13, "learning_rate": 3.4814635913537013e-05, "loss": 0.8168, "step": 2515, "task_loss": 1.028497338294983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1770217418670654, "epoch": 2.13, "learning_rate": 3.480859799541119e-05, "loss": 0.7334, "step": 2516, "task_loss": 1.9058687686920166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7031017541885376, "epoch": 2.13, "learning_rate": 3.480256007728535e-05, "loss": 0.5497, "step": 2517, "task_loss": 0.6214005947113037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1955652236938477, "epoch": 2.13, "learning_rate": 3.479652215915952e-05, "loss": 0.7417, "step": 2518, "task_loss": 0.7200250029563904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9072008728981018, "epoch": 2.13, "learning_rate": 3.4790484241033696e-05, "loss": 0.7391, "step": 2519, "task_loss": 0.7941955327987671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0419659614562988, "epoch": 2.13, "learning_rate": 3.478444632290786e-05, "loss": 0.8328, "step": 2520, "task_loss": 1.0112481117248535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6940279006958008, "epoch": 2.13, "learning_rate": 3.477840840478203e-05, "loss": 0.9223, "step": 2521, "task_loss": 1.069301962852478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4730271100997925, "epoch": 2.13, "learning_rate": 3.4772370486656204e-05, "loss": 0.6896, "step": 2522, "task_loss": 1.212876319885254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8402670621871948, "epoch": 2.13, "learning_rate": 3.476633256853037e-05, "loss": 0.7959, "step": 2523, "task_loss": 0.5971810221672058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6537269353866577, "epoch": 2.13, "learning_rate": 3.476029465040454e-05, "loss": 0.7189, "step": 2524, "task_loss": 0.665871262550354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.2801713943481445, "epoch": 2.13, "learning_rate": 3.475425673227871e-05, "loss": 0.9194, "step": 2525, "task_loss": 1.2647610902786255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.608608603477478, "epoch": 2.14, "learning_rate": 3.4748218814152886e-05, "loss": 0.6748, "step": 2526, "task_loss": 0.5705200433731079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5313808917999268, "epoch": 2.14, "learning_rate": 3.474218089602705e-05, "loss": 0.7934, "step": 2527, "task_loss": 0.5272083878517151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6207451820373535, "epoch": 2.14, "learning_rate": 3.473614297790122e-05, "loss": 0.7166, "step": 2528, "task_loss": 0.7474178075790405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8747502565383911, "epoch": 2.14, "learning_rate": 3.4730105059775395e-05, "loss": 0.7302, "step": 2529, "task_loss": 0.7328029870986938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5046684741973877, "epoch": 2.14, "learning_rate": 3.472406714164956e-05, "loss": 0.8014, "step": 2530, "task_loss": 1.367836356163025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7532894611358643, "epoch": 2.14, "learning_rate": 3.471802922352373e-05, "loss": 0.7814, "step": 2531, "task_loss": 1.7108696699142456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0363688468933105, "epoch": 2.14, "learning_rate": 3.47119913053979e-05, "loss": 0.7145, "step": 2532, "task_loss": 0.7569209337234497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5595017671585083, "epoch": 2.14, "learning_rate": 3.470595338727207e-05, "loss": 0.5795, "step": 2533, "task_loss": 1.5426212549209595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3059442937374115, "epoch": 2.14, "learning_rate": 3.469991546914624e-05, "loss": 0.5479, "step": 2534, "task_loss": 0.6850960850715637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6967679262161255, "epoch": 2.14, "learning_rate": 3.469387755102041e-05, "loss": 0.6169, "step": 2535, "task_loss": 0.5915271639823914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.2692327499389648, "epoch": 2.14, "learning_rate": 3.468783963289458e-05, "loss": 0.811, "step": 2536, "task_loss": 1.0151129961013794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6008278131484985, "epoch": 2.14, "learning_rate": 3.4681801714768746e-05, "loss": 0.8142, "step": 2537, "task_loss": 0.7451302409172058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.509485125541687, "epoch": 2.15, "learning_rate": 3.467576379664292e-05, "loss": 0.8295, "step": 2538, "task_loss": 0.8438625931739807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7710742354393005, "epoch": 2.15, "learning_rate": 3.466972587851709e-05, "loss": 0.9263, "step": 2539, "task_loss": 0.5225381851196289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.59308922290802, "epoch": 2.15, "learning_rate": 3.466368796039126e-05, "loss": 0.6599, "step": 2540, "task_loss": 0.3796142339706421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7615129947662354, "epoch": 2.15, "learning_rate": 3.465765004226543e-05, "loss": 0.8736, "step": 2541, "task_loss": 1.3848168849945068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6708366870880127, "epoch": 2.15, "learning_rate": 3.4651612124139595e-05, "loss": 0.7585, "step": 2542, "task_loss": 1.6661099195480347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0438733100891113, "epoch": 2.15, "learning_rate": 3.464557420601377e-05, "loss": 0.9435, "step": 2543, "task_loss": 1.2974282503128052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5425032377243042, "epoch": 2.15, "learning_rate": 3.4639536287887936e-05, "loss": 0.6803, "step": 2544, "task_loss": 0.19314466416835785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7122754454612732, "epoch": 2.15, "learning_rate": 3.463349836976211e-05, "loss": 0.7305, "step": 2545, "task_loss": 1.1431130170822144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42436718940734863, "epoch": 2.15, "learning_rate": 3.462746045163628e-05, "loss": 0.6689, "step": 2546, "task_loss": 0.7460067272186279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.457874059677124, "epoch": 2.15, "learning_rate": 3.4621422533510445e-05, "loss": 0.5964, "step": 2547, "task_loss": 0.6062554717063904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49053966999053955, "epoch": 2.15, "learning_rate": 3.461538461538462e-05, "loss": 0.631, "step": 2548, "task_loss": 0.20629334449768066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7455025911331177, "epoch": 2.15, "learning_rate": 3.4609346697258786e-05, "loss": 0.6491, "step": 2549, "task_loss": 1.0195609331130981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9819023609161377, "epoch": 2.16, "learning_rate": 3.460330877913296e-05, "loss": 0.8959, "step": 2550, "task_loss": 0.7578344941139221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.541140079498291, "epoch": 2.16, "learning_rate": 3.459727086100713e-05, "loss": 0.6446, "step": 2551, "task_loss": 0.4573591649532318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5919749736785889, "epoch": 2.16, "learning_rate": 3.4591232942881294e-05, "loss": 0.7575, "step": 2552, "task_loss": 1.2655636072158813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7198960781097412, "epoch": 2.16, "learning_rate": 3.458519502475547e-05, "loss": 0.686, "step": 2553, "task_loss": 0.9463868141174316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6336629390716553, "epoch": 2.16, "learning_rate": 3.4579157106629635e-05, "loss": 0.5249, "step": 2554, "task_loss": 0.5117077827453613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.905958890914917, "epoch": 2.16, "learning_rate": 3.45731191885038e-05, "loss": 0.7005, "step": 2555, "task_loss": 0.9419921636581421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.045901894569397, "epoch": 2.16, "learning_rate": 3.4567081270377976e-05, "loss": 0.8129, "step": 2556, "task_loss": 1.8717563152313232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8632268905639648, "epoch": 2.16, "learning_rate": 3.4561043352252144e-05, "loss": 0.988, "step": 2557, "task_loss": 1.3686076402664185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9747951030731201, "epoch": 2.16, "learning_rate": 3.455500543412631e-05, "loss": 0.6753, "step": 2558, "task_loss": 0.7295071482658386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0820378065109253, "epoch": 2.16, "learning_rate": 3.4548967516000485e-05, "loss": 0.831, "step": 2559, "task_loss": 2.3239612579345703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7568842172622681, "epoch": 2.16, "learning_rate": 3.454292959787466e-05, "loss": 0.7188, "step": 2560, "task_loss": 0.5114870667457581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6254346370697021, "epoch": 2.16, "learning_rate": 3.4536891679748826e-05, "loss": 0.4971, "step": 2561, "task_loss": 0.28695619106292725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6483465433120728, "epoch": 2.17, "learning_rate": 3.453085376162299e-05, "loss": 0.7784, "step": 2562, "task_loss": 0.7298913598060608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8677448034286499, "epoch": 2.17, "learning_rate": 3.452481584349717e-05, "loss": 0.8262, "step": 2563, "task_loss": 0.47839632630348206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7098305225372314, "epoch": 2.17, "learning_rate": 3.4518777925371334e-05, "loss": 0.6015, "step": 2564, "task_loss": 1.1776984930038452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6359189748764038, "epoch": 2.17, "learning_rate": 3.45127400072455e-05, "loss": 0.6633, "step": 2565, "task_loss": 0.24316813051700592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.3071026802062988, "epoch": 2.17, "learning_rate": 3.4506702089119675e-05, "loss": 0.9435, "step": 2566, "task_loss": 1.283416748046875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43450814485549927, "epoch": 2.17, "learning_rate": 3.450066417099384e-05, "loss": 0.6538, "step": 2567, "task_loss": 0.6240083575248718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5230266451835632, "epoch": 2.17, "learning_rate": 3.449462625286801e-05, "loss": 0.5523, "step": 2568, "task_loss": 0.2686050236225128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6266749501228333, "epoch": 2.17, "learning_rate": 3.4488588334742184e-05, "loss": 0.7032, "step": 2569, "task_loss": 0.6975458264350891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5010442733764648, "epoch": 2.17, "learning_rate": 3.448255041661636e-05, "loss": 0.7527, "step": 2570, "task_loss": 0.18750514090061188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4830760955810547, "epoch": 2.17, "learning_rate": 3.447651249849052e-05, "loss": 0.6227, "step": 2571, "task_loss": 0.5416849255561829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6181453466415405, "epoch": 2.17, "learning_rate": 3.447047458036469e-05, "loss": 0.585, "step": 2572, "task_loss": 0.6560701727867126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4040301442146301, "epoch": 2.17, "learning_rate": 3.4464436662238866e-05, "loss": 0.6735, "step": 2573, "task_loss": 0.46786898374557495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7817615270614624, "epoch": 2.18, "learning_rate": 3.4458398744113026e-05, "loss": 0.7999, "step": 2574, "task_loss": 1.0563619136810303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.2017359733581543, "epoch": 2.18, "learning_rate": 3.44523608259872e-05, "loss": 0.7796, "step": 2575, "task_loss": 0.8636245727539062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7227388620376587, "epoch": 2.18, "learning_rate": 3.4446322907861374e-05, "loss": 0.8303, "step": 2576, "task_loss": 1.4670159816741943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.453777939081192, "epoch": 2.18, "learning_rate": 3.444028498973554e-05, "loss": 0.705, "step": 2577, "task_loss": 0.5616059899330139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5245682001113892, "epoch": 2.18, "learning_rate": 3.443424707160971e-05, "loss": 0.5438, "step": 2578, "task_loss": 1.3908417224884033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7364324331283569, "epoch": 2.18, "learning_rate": 3.442820915348388e-05, "loss": 0.7401, "step": 2579, "task_loss": 0.7921330332756042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8037649393081665, "epoch": 2.18, "learning_rate": 3.442217123535805e-05, "loss": 0.8788, "step": 2580, "task_loss": 0.24544285237789154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8214502930641174, "epoch": 2.18, "learning_rate": 3.441613331723222e-05, "loss": 0.6763, "step": 2581, "task_loss": 0.4376085698604584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7653232216835022, "epoch": 2.18, "learning_rate": 3.441009539910639e-05, "loss": 0.7187, "step": 2582, "task_loss": 0.5402956008911133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5785995721817017, "epoch": 2.18, "learning_rate": 3.4404057480980565e-05, "loss": 0.5007, "step": 2583, "task_loss": 0.9567428827285767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.608117938041687, "epoch": 2.18, "learning_rate": 3.4398019562854725e-05, "loss": 0.6376, "step": 2584, "task_loss": 0.9879399538040161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.208664059638977, "epoch": 2.19, "learning_rate": 3.43919816447289e-05, "loss": 0.794, "step": 2585, "task_loss": 0.566828727722168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.522575318813324, "epoch": 2.19, "learning_rate": 3.438594372660307e-05, "loss": 0.5832, "step": 2586, "task_loss": 0.7799788117408752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5873284339904785, "epoch": 2.19, "learning_rate": 3.4379905808477234e-05, "loss": 0.6663, "step": 2587, "task_loss": 1.0264599323272705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7285182476043701, "epoch": 2.19, "learning_rate": 3.437386789035141e-05, "loss": 0.6293, "step": 2588, "task_loss": 0.8044673204421997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8728448748588562, "epoch": 2.19, "learning_rate": 3.436782997222558e-05, "loss": 0.8622, "step": 2589, "task_loss": 1.1106007099151611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6990394592285156, "epoch": 2.19, "learning_rate": 3.436179205409975e-05, "loss": 0.8194, "step": 2590, "task_loss": 1.1955654621124268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7859220504760742, "epoch": 2.19, "learning_rate": 3.4355754135973916e-05, "loss": 0.6361, "step": 2591, "task_loss": 0.9840671420097351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5821216106414795, "epoch": 2.19, "learning_rate": 3.434971621784809e-05, "loss": 0.6195, "step": 2592, "task_loss": 1.1663172245025635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5208157896995544, "epoch": 2.19, "learning_rate": 3.434367829972226e-05, "loss": 0.4036, "step": 2593, "task_loss": 0.3721601068973541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5542153120040894, "epoch": 2.19, "learning_rate": 3.4337640381596424e-05, "loss": 0.7979, "step": 2594, "task_loss": 1.0534355640411377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6702423691749573, "epoch": 2.19, "learning_rate": 3.43316024634706e-05, "loss": 0.7304, "step": 2595, "task_loss": 1.0202596187591553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.700971782207489, "epoch": 2.19, "learning_rate": 3.4325564545344765e-05, "loss": 0.6484, "step": 2596, "task_loss": 0.47938477993011475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7527737021446228, "epoch": 2.2, "learning_rate": 3.431952662721893e-05, "loss": 0.7968, "step": 2597, "task_loss": 0.7350645661354065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8180854916572571, "epoch": 2.2, "learning_rate": 3.4313488709093107e-05, "loss": 0.721, "step": 2598, "task_loss": 0.4951660633087158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8789198398590088, "epoch": 2.2, "learning_rate": 3.430745079096728e-05, "loss": 0.6668, "step": 2599, "task_loss": 1.589779257774353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6745799779891968, "epoch": 2.2, "learning_rate": 3.430141287284145e-05, "loss": 0.5073, "step": 2600, "task_loss": 1.1945455074310303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.501326322555542, "epoch": 2.2, "learning_rate": 3.4295374954715615e-05, "loss": 0.6682, "step": 2601, "task_loss": 0.7123518586158752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9340012073516846, "epoch": 2.2, "learning_rate": 3.428933703658979e-05, "loss": 0.6908, "step": 2602, "task_loss": 0.6953555345535278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.645127534866333, "epoch": 2.2, "learning_rate": 3.4283299118463956e-05, "loss": 0.8353, "step": 2603, "task_loss": 0.8894546627998352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5265524983406067, "epoch": 2.2, "learning_rate": 3.427726120033812e-05, "loss": 0.6254, "step": 2604, "task_loss": 0.95720374584198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9997602701187134, "epoch": 2.2, "learning_rate": 3.42712232822123e-05, "loss": 0.7205, "step": 2605, "task_loss": 0.678781270980835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9150526523590088, "epoch": 2.2, "learning_rate": 3.4265185364086464e-05, "loss": 0.6961, "step": 2606, "task_loss": 1.1692907810211182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7650465965270996, "epoch": 2.2, "learning_rate": 3.425914744596063e-05, "loss": 0.6901, "step": 2607, "task_loss": 1.0067163705825806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6061875820159912, "epoch": 2.2, "learning_rate": 3.4253109527834805e-05, "loss": 0.6734, "step": 2608, "task_loss": 1.2997677326202393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6984352469444275, "epoch": 2.21, "learning_rate": 3.424707160970897e-05, "loss": 0.8346, "step": 2609, "task_loss": 1.5402400493621826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.683719277381897, "epoch": 2.21, "learning_rate": 3.4241033691583147e-05, "loss": 0.5358, "step": 2610, "task_loss": 0.13494500517845154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7323950529098511, "epoch": 2.21, "learning_rate": 3.4234995773457314e-05, "loss": 0.7207, "step": 2611, "task_loss": 2.20849347114563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0876598358154297, "epoch": 2.21, "learning_rate": 3.422895785533148e-05, "loss": 0.7456, "step": 2612, "task_loss": 1.1543186902999878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5852044820785522, "epoch": 2.21, "learning_rate": 3.4222919937205655e-05, "loss": 0.6764, "step": 2613, "task_loss": 0.12898634374141693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3412366509437561, "epoch": 2.21, "learning_rate": 3.421688201907982e-05, "loss": 0.6635, "step": 2614, "task_loss": 0.09056396037340164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6522666215896606, "epoch": 2.21, "learning_rate": 3.421084410095399e-05, "loss": 0.507, "step": 2615, "task_loss": 1.1570056676864624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7936397790908813, "epoch": 2.21, "learning_rate": 3.420480618282816e-05, "loss": 0.6562, "step": 2616, "task_loss": 1.46969473361969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9762423038482666, "epoch": 2.21, "learning_rate": 3.419876826470233e-05, "loss": 0.8368, "step": 2617, "task_loss": 2.0029287338256836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.637478768825531, "epoch": 2.21, "learning_rate": 3.4192730346576504e-05, "loss": 0.6155, "step": 2618, "task_loss": 0.30061423778533936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.751225471496582, "epoch": 2.21, "learning_rate": 3.418669242845067e-05, "loss": 0.6564, "step": 2619, "task_loss": 0.9528793692588806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.87307208776474, "epoch": 2.21, "learning_rate": 3.4180654510324846e-05, "loss": 0.7036, "step": 2620, "task_loss": 1.2467530965805054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6095675826072693, "epoch": 2.22, "learning_rate": 3.417461659219901e-05, "loss": 0.5992, "step": 2621, "task_loss": 0.5464479923248291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6189175248146057, "epoch": 2.22, "learning_rate": 3.416857867407318e-05, "loss": 0.5091, "step": 2622, "task_loss": 0.8800733089447021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2679450213909149, "epoch": 2.22, "learning_rate": 3.4162540755947354e-05, "loss": 0.7927, "step": 2623, "task_loss": 0.17810727655887604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8602766990661621, "epoch": 2.22, "learning_rate": 3.415650283782152e-05, "loss": 0.8124, "step": 2624, "task_loss": 0.7364860773086548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8210525512695312, "epoch": 2.22, "learning_rate": 3.415046491969569e-05, "loss": 0.7117, "step": 2625, "task_loss": 0.38724952936172485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5421228408813477, "epoch": 2.22, "learning_rate": 3.414442700156986e-05, "loss": 0.8688, "step": 2626, "task_loss": 1.0124809741973877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8692517280578613, "epoch": 2.22, "learning_rate": 3.413838908344403e-05, "loss": 0.6922, "step": 2627, "task_loss": 0.608087420463562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.811151921749115, "epoch": 2.22, "learning_rate": 3.4132351165318197e-05, "loss": 0.7664, "step": 2628, "task_loss": 1.2837374210357666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8792744278907776, "epoch": 2.22, "learning_rate": 3.412631324719237e-05, "loss": 0.6986, "step": 2629, "task_loss": 1.0684648752212524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0081779956817627, "epoch": 2.22, "learning_rate": 3.4120275329066544e-05, "loss": 0.6443, "step": 2630, "task_loss": 0.6529222726821899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0155611038208008, "epoch": 2.22, "learning_rate": 3.4114237410940705e-05, "loss": 0.6661, "step": 2631, "task_loss": 0.5920569896697998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7768115401268005, "epoch": 2.22, "learning_rate": 3.410819949281488e-05, "loss": 0.6739, "step": 2632, "task_loss": 0.6007518172264099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9778826832771301, "epoch": 2.23, "learning_rate": 3.410216157468905e-05, "loss": 0.6225, "step": 2633, "task_loss": 0.572760820388794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.812739372253418, "epoch": 2.23, "learning_rate": 3.409612365656322e-05, "loss": 0.7358, "step": 2634, "task_loss": 0.7990007996559143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44216251373291016, "epoch": 2.23, "learning_rate": 3.409008573843739e-05, "loss": 0.7406, "step": 2635, "task_loss": 0.48759278655052185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3929172158241272, "epoch": 2.23, "learning_rate": 3.408404782031156e-05, "loss": 0.5623, "step": 2636, "task_loss": 0.2060825526714325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5328934192657471, "epoch": 2.23, "learning_rate": 3.407800990218573e-05, "loss": 0.6912, "step": 2637, "task_loss": 0.23770923912525177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3774576187133789, "epoch": 2.23, "learning_rate": 3.4071971984059895e-05, "loss": 0.537, "step": 2638, "task_loss": 0.02624155394732952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6521416902542114, "epoch": 2.23, "learning_rate": 3.406593406593407e-05, "loss": 0.6273, "step": 2639, "task_loss": 0.5498329997062683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7566461563110352, "epoch": 2.23, "learning_rate": 3.405989614780824e-05, "loss": 0.7467, "step": 2640, "task_loss": 1.1999878883361816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5538116693496704, "epoch": 2.23, "learning_rate": 3.4053858229682404e-05, "loss": 0.7327, "step": 2641, "task_loss": 1.9610590934753418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4187253415584564, "epoch": 2.23, "learning_rate": 3.404782031155658e-05, "loss": 0.6686, "step": 2642, "task_loss": 1.2978270053863525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7021353244781494, "epoch": 2.23, "learning_rate": 3.404178239343075e-05, "loss": 0.6555, "step": 2643, "task_loss": 0.9532296061515808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5864866375923157, "epoch": 2.23, "learning_rate": 3.403574447530491e-05, "loss": 0.6113, "step": 2644, "task_loss": 0.6666108965873718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6116681694984436, "epoch": 2.24, "learning_rate": 3.4029706557179086e-05, "loss": 0.7042, "step": 2645, "task_loss": 1.3746082782745361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5805075764656067, "epoch": 2.24, "learning_rate": 3.402366863905326e-05, "loss": 0.6662, "step": 2646, "task_loss": 0.3192768394947052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4770742356777191, "epoch": 2.24, "learning_rate": 3.401763072092742e-05, "loss": 0.5269, "step": 2647, "task_loss": 0.4491952657699585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3582439422607422, "epoch": 2.24, "learning_rate": 3.4011592802801594e-05, "loss": 0.5199, "step": 2648, "task_loss": 0.558834433555603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4516242742538452, "epoch": 2.24, "learning_rate": 3.400555488467577e-05, "loss": 0.6587, "step": 2649, "task_loss": 0.5816102623939514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8450843691825867, "epoch": 2.24, "learning_rate": 3.3999516966549936e-05, "loss": 0.6157, "step": 2650, "task_loss": 1.233894944190979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5599538087844849, "epoch": 2.24, "learning_rate": 3.39934790484241e-05, "loss": 0.7907, "step": 2651, "task_loss": 0.525175154209137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5610092878341675, "epoch": 2.24, "learning_rate": 3.398744113029828e-05, "loss": 0.6225, "step": 2652, "task_loss": 0.6139942407608032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46849921345710754, "epoch": 2.24, "learning_rate": 3.3981403212172444e-05, "loss": 0.5478, "step": 2653, "task_loss": 1.2626956701278687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.569875180721283, "epoch": 2.24, "learning_rate": 3.397536529404661e-05, "loss": 0.5476, "step": 2654, "task_loss": 1.5239254236221313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49449944496154785, "epoch": 2.24, "learning_rate": 3.3969327375920785e-05, "loss": 0.5256, "step": 2655, "task_loss": 0.2840801477432251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7613524198532104, "epoch": 2.24, "learning_rate": 3.396328945779496e-05, "loss": 0.863, "step": 2656, "task_loss": 1.1425716876983643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7014766931533813, "epoch": 2.25, "learning_rate": 3.395725153966912e-05, "loss": 0.7638, "step": 2657, "task_loss": 1.8168034553527832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.3157107830047607, "epoch": 2.25, "learning_rate": 3.395121362154329e-05, "loss": 0.8987, "step": 2658, "task_loss": 2.1560657024383545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8421964049339294, "epoch": 2.25, "learning_rate": 3.394517570341747e-05, "loss": 0.7754, "step": 2659, "task_loss": 0.8056140542030334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6035676002502441, "epoch": 2.25, "learning_rate": 3.393913778529163e-05, "loss": 0.6515, "step": 2660, "task_loss": 0.6324442625045776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5127855539321899, "epoch": 2.25, "learning_rate": 3.39330998671658e-05, "loss": 0.621, "step": 2661, "task_loss": 0.5379679799079895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4989939033985138, "epoch": 2.25, "learning_rate": 3.3927061949039976e-05, "loss": 0.7099, "step": 2662, "task_loss": 0.6581425070762634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6243914365768433, "epoch": 2.25, "learning_rate": 3.392102403091414e-05, "loss": 0.6601, "step": 2663, "task_loss": 1.225645661354065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4747631847858429, "epoch": 2.25, "learning_rate": 3.391498611278831e-05, "loss": 0.4414, "step": 2664, "task_loss": 0.4177016317844391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7747278809547424, "epoch": 2.25, "learning_rate": 3.3908948194662484e-05, "loss": 0.7084, "step": 2665, "task_loss": 1.1071856021881104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0929222106933594, "epoch": 2.25, "learning_rate": 3.390291027653665e-05, "loss": 0.6305, "step": 2666, "task_loss": 1.3245470523834229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9765191078186035, "epoch": 2.25, "learning_rate": 3.389687235841082e-05, "loss": 0.9332, "step": 2667, "task_loss": 0.5607027411460876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8091405630111694, "epoch": 2.26, "learning_rate": 3.389083444028499e-05, "loss": 0.7944, "step": 2668, "task_loss": 1.2394814491271973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9554165601730347, "epoch": 2.26, "learning_rate": 3.388479652215916e-05, "loss": 0.7027, "step": 2669, "task_loss": 1.2550103664398193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4429936408996582, "epoch": 2.26, "learning_rate": 3.387875860403333e-05, "loss": 0.5099, "step": 2670, "task_loss": 0.11318476498126984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8526570796966553, "epoch": 2.26, "learning_rate": 3.38727206859075e-05, "loss": 0.6655, "step": 2671, "task_loss": 0.9419602155685425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.751725971698761, "epoch": 2.26, "learning_rate": 3.386668276778167e-05, "loss": 0.5617, "step": 2672, "task_loss": 1.0172244310379028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6345117688179016, "epoch": 2.26, "learning_rate": 3.386064484965584e-05, "loss": 0.5111, "step": 2673, "task_loss": 0.7925119400024414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9003385305404663, "epoch": 2.26, "learning_rate": 3.385460693153001e-05, "loss": 0.5999, "step": 2674, "task_loss": 1.1736645698547363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0219637155532837, "epoch": 2.26, "learning_rate": 3.384856901340418e-05, "loss": 0.7004, "step": 2675, "task_loss": 0.4667414426803589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0454342365264893, "epoch": 2.26, "learning_rate": 3.384253109527835e-05, "loss": 0.83, "step": 2676, "task_loss": 0.6524487733840942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7942463159561157, "epoch": 2.26, "learning_rate": 3.383649317715252e-05, "loss": 0.6553, "step": 2677, "task_loss": 0.5513631701469421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4609701633453369, "epoch": 2.26, "learning_rate": 3.383045525902669e-05, "loss": 0.769, "step": 2678, "task_loss": 0.592752993106842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4303951561450958, "epoch": 2.26, "learning_rate": 3.382441734090086e-05, "loss": 0.6223, "step": 2679, "task_loss": 0.19830019772052765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8047993183135986, "epoch": 2.27, "learning_rate": 3.3818379422775026e-05, "loss": 0.8093, "step": 2680, "task_loss": 0.5432954430580139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.542350172996521, "epoch": 2.27, "learning_rate": 3.38123415046492e-05, "loss": 0.5575, "step": 2681, "task_loss": 0.9161694049835205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4339176416397095, "epoch": 2.27, "learning_rate": 3.380630358652337e-05, "loss": 0.6989, "step": 2682, "task_loss": 0.3771783709526062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2946106791496277, "epoch": 2.27, "learning_rate": 3.380026566839754e-05, "loss": 0.5013, "step": 2683, "task_loss": 0.3698568344116211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1576623916625977, "epoch": 2.27, "learning_rate": 3.379422775027171e-05, "loss": 0.748, "step": 2684, "task_loss": 0.6636067032814026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3914187550544739, "epoch": 2.27, "learning_rate": 3.3788189832145875e-05, "loss": 0.4151, "step": 2685, "task_loss": 1.4333109855651855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6356293559074402, "epoch": 2.27, "learning_rate": 3.378215191402005e-05, "loss": 0.563, "step": 2686, "task_loss": 0.3057313859462738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8696633577346802, "epoch": 2.27, "learning_rate": 3.3776113995894216e-05, "loss": 0.7421, "step": 2687, "task_loss": 0.38226792216300964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6390997171401978, "epoch": 2.27, "learning_rate": 3.377007607776838e-05, "loss": 0.6792, "step": 2688, "task_loss": 0.5755239129066467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4572407007217407, "epoch": 2.27, "learning_rate": 3.376403815964256e-05, "loss": 0.5528, "step": 2689, "task_loss": 0.6429844498634338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7626864314079285, "epoch": 2.27, "learning_rate": 3.3758000241516725e-05, "loss": 0.6213, "step": 2690, "task_loss": 1.6242361068725586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7320970296859741, "epoch": 2.27, "learning_rate": 3.37519623233909e-05, "loss": 0.5834, "step": 2691, "task_loss": 0.9467758536338806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4327676594257355, "epoch": 2.28, "learning_rate": 3.3745924405265066e-05, "loss": 0.4639, "step": 2692, "task_loss": 0.6254423260688782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.392842561006546, "epoch": 2.28, "learning_rate": 3.373988648713924e-05, "loss": 0.6192, "step": 2693, "task_loss": 0.4435555040836334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6814653873443604, "epoch": 2.28, "learning_rate": 3.373384856901341e-05, "loss": 0.7281, "step": 2694, "task_loss": 1.605644702911377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49557048082351685, "epoch": 2.28, "learning_rate": 3.3727810650887574e-05, "loss": 0.5623, "step": 2695, "task_loss": 1.020995855331421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3711540102958679, "epoch": 2.28, "learning_rate": 3.372177273276175e-05, "loss": 0.5111, "step": 2696, "task_loss": 0.742051899433136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5394238829612732, "epoch": 2.28, "learning_rate": 3.3715734814635915e-05, "loss": 0.6217, "step": 2697, "task_loss": 0.5072696208953857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42204415798187256, "epoch": 2.28, "learning_rate": 3.370969689651008e-05, "loss": 0.6453, "step": 2698, "task_loss": 0.991134524345398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1793253421783447, "epoch": 2.28, "learning_rate": 3.3703658978384256e-05, "loss": 0.8239, "step": 2699, "task_loss": 1.4583773612976074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46935105323791504, "epoch": 2.28, "learning_rate": 3.3697621060258423e-05, "loss": 0.5779, "step": 2700, "task_loss": 0.4040989279747009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.625400185585022, "epoch": 2.28, "learning_rate": 3.369158314213259e-05, "loss": 0.6321, "step": 2701, "task_loss": 1.4939088821411133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46049830317497253, "epoch": 2.28, "learning_rate": 3.3685545224006765e-05, "loss": 0.5578, "step": 2702, "task_loss": 0.2506542205810547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7380083799362183, "epoch": 2.28, "learning_rate": 3.367950730588094e-05, "loss": 0.7547, "step": 2703, "task_loss": 0.6873634457588196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8846030831336975, "epoch": 2.29, "learning_rate": 3.36734693877551e-05, "loss": 0.7315, "step": 2704, "task_loss": 0.8561525940895081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6801671981811523, "epoch": 2.29, "learning_rate": 3.366743146962927e-05, "loss": 0.8634, "step": 2705, "task_loss": 1.4676061868667603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41640254855155945, "epoch": 2.29, "learning_rate": 3.366139355150345e-05, "loss": 0.5665, "step": 2706, "task_loss": 0.8160548806190491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5691567659378052, "epoch": 2.29, "learning_rate": 3.3655355633377614e-05, "loss": 0.6306, "step": 2707, "task_loss": 0.39450332522392273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.629193902015686, "epoch": 2.29, "learning_rate": 3.364931771525178e-05, "loss": 0.6314, "step": 2708, "task_loss": 1.0437183380126953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6980026960372925, "epoch": 2.29, "learning_rate": 3.3643279797125955e-05, "loss": 0.5645, "step": 2709, "task_loss": 1.1700043678283691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38411420583724976, "epoch": 2.29, "learning_rate": 3.363724187900012e-05, "loss": 0.4069, "step": 2710, "task_loss": 0.6631032824516296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4109249711036682, "epoch": 2.29, "learning_rate": 3.363120396087429e-05, "loss": 0.7529, "step": 2711, "task_loss": 0.07277268171310425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4959256649017334, "epoch": 2.29, "learning_rate": 3.3625166042748464e-05, "loss": 0.5275, "step": 2712, "task_loss": 0.8799400329589844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46742361783981323, "epoch": 2.29, "learning_rate": 3.361912812462264e-05, "loss": 0.5151, "step": 2713, "task_loss": 0.16544200479984283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8594121932983398, "epoch": 2.29, "learning_rate": 3.36130902064968e-05, "loss": 0.8353, "step": 2714, "task_loss": 0.47208714485168457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8432763814926147, "epoch": 2.29, "learning_rate": 3.360705228837097e-05, "loss": 0.6761, "step": 2715, "task_loss": 1.429359793663025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8072237968444824, "epoch": 2.3, "learning_rate": 3.3601014370245146e-05, "loss": 0.7754, "step": 2716, "task_loss": 1.6003552675247192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38086289167404175, "epoch": 2.3, "learning_rate": 3.3594976452119306e-05, "loss": 0.4739, "step": 2717, "task_loss": 0.9192397594451904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39856868982315063, "epoch": 2.3, "learning_rate": 3.358893853399348e-05, "loss": 0.4943, "step": 2718, "task_loss": 1.0037541389465332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5616945028305054, "epoch": 2.3, "learning_rate": 3.3582900615867654e-05, "loss": 0.6461, "step": 2719, "task_loss": 0.7370935678482056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47286760807037354, "epoch": 2.3, "learning_rate": 3.3576862697741815e-05, "loss": 0.5989, "step": 2720, "task_loss": 1.177642822265625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6069406270980835, "epoch": 2.3, "learning_rate": 3.357082477961599e-05, "loss": 0.611, "step": 2721, "task_loss": 0.4100778102874756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5627729892730713, "epoch": 2.3, "learning_rate": 3.356478686149016e-05, "loss": 0.4013, "step": 2722, "task_loss": 0.3056902289390564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37770023941993713, "epoch": 2.3, "learning_rate": 3.355874894336433e-05, "loss": 0.618, "step": 2723, "task_loss": 0.6361449360847473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6895098090171814, "epoch": 2.3, "learning_rate": 3.35527110252385e-05, "loss": 0.6601, "step": 2724, "task_loss": 0.43686744570732117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5355291366577148, "epoch": 2.3, "learning_rate": 3.354667310711267e-05, "loss": 0.7038, "step": 2725, "task_loss": 0.6033897995948792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5298436284065247, "epoch": 2.3, "learning_rate": 3.354063518898684e-05, "loss": 0.7446, "step": 2726, "task_loss": 0.4861023426055908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43415188789367676, "epoch": 2.3, "learning_rate": 3.3534597270861005e-05, "loss": 0.5438, "step": 2727, "task_loss": 0.7806878089904785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8575556874275208, "epoch": 2.31, "learning_rate": 3.352855935273518e-05, "loss": 0.7996, "step": 2728, "task_loss": 0.7277016639709473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8144185543060303, "epoch": 2.31, "learning_rate": 3.352252143460935e-05, "loss": 0.7489, "step": 2729, "task_loss": 1.920546531677246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8677695989608765, "epoch": 2.31, "learning_rate": 3.3516483516483513e-05, "loss": 0.8103, "step": 2730, "task_loss": 0.45388349890708923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32260236144065857, "epoch": 2.31, "learning_rate": 3.351044559835769e-05, "loss": 0.4678, "step": 2731, "task_loss": 0.2385791689157486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6568719148635864, "epoch": 2.31, "learning_rate": 3.350440768023186e-05, "loss": 0.6416, "step": 2732, "task_loss": 0.587561845779419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5897412896156311, "epoch": 2.31, "learning_rate": 3.349836976210603e-05, "loss": 0.6443, "step": 2733, "task_loss": 0.7575717568397522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3765987455844879, "epoch": 2.31, "learning_rate": 3.3492331843980196e-05, "loss": 0.5227, "step": 2734, "task_loss": 0.5719147324562073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4877831041812897, "epoch": 2.31, "learning_rate": 3.348629392585437e-05, "loss": 0.7485, "step": 2735, "task_loss": 0.07227770239114761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46922093629837036, "epoch": 2.31, "learning_rate": 3.348025600772854e-05, "loss": 0.6399, "step": 2736, "task_loss": 0.29054921865463257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4348430931568146, "epoch": 2.31, "learning_rate": 3.3474218089602704e-05, "loss": 0.5284, "step": 2737, "task_loss": 0.9642353057861328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7801523804664612, "epoch": 2.31, "learning_rate": 3.346818017147688e-05, "loss": 0.6722, "step": 2738, "task_loss": 1.3326172828674316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43285736441612244, "epoch": 2.32, "learning_rate": 3.3462142253351045e-05, "loss": 0.5067, "step": 2739, "task_loss": 0.7724419832229614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5086011290550232, "epoch": 2.32, "learning_rate": 3.345610433522521e-05, "loss": 0.7637, "step": 2740, "task_loss": 1.4377247095108032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8792120814323425, "epoch": 2.32, "learning_rate": 3.3450066417099386e-05, "loss": 0.9623, "step": 2741, "task_loss": 0.8088839650154114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7639023661613464, "epoch": 2.32, "learning_rate": 3.3444028498973554e-05, "loss": 0.6296, "step": 2742, "task_loss": 1.2741889953613281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6958885192871094, "epoch": 2.32, "learning_rate": 3.343799058084773e-05, "loss": 0.6035, "step": 2743, "task_loss": 2.250288963317871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9366300106048584, "epoch": 2.32, "learning_rate": 3.3431952662721895e-05, "loss": 0.7563, "step": 2744, "task_loss": 0.4588649272918701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1058248281478882, "epoch": 2.32, "learning_rate": 3.342591474459606e-05, "loss": 0.7354, "step": 2745, "task_loss": 0.44825008511543274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47396549582481384, "epoch": 2.32, "learning_rate": 3.3419876826470236e-05, "loss": 0.5844, "step": 2746, "task_loss": 0.5525580644607544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.681865394115448, "epoch": 2.32, "learning_rate": 3.34138389083444e-05, "loss": 0.68, "step": 2747, "task_loss": 1.002031683921814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43215566873550415, "epoch": 2.32, "learning_rate": 3.340780099021858e-05, "loss": 0.5582, "step": 2748, "task_loss": 0.777837336063385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6499994993209839, "epoch": 2.32, "learning_rate": 3.3401763072092744e-05, "loss": 0.5485, "step": 2749, "task_loss": 1.7890537977218628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5643678903579712, "epoch": 2.32, "learning_rate": 3.339572515396691e-05, "loss": 0.5025, "step": 2750, "task_loss": 0.8483378291130066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6843158602714539, "epoch": 2.33, "learning_rate": 3.3389687235841085e-05, "loss": 0.7267, "step": 2751, "task_loss": 0.9465029835700989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6302250027656555, "epoch": 2.33, "learning_rate": 3.338364931771525e-05, "loss": 0.6673, "step": 2752, "task_loss": 0.5732086896896362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2811475992202759, "epoch": 2.33, "learning_rate": 3.3377611399589426e-05, "loss": 0.5508, "step": 2753, "task_loss": 0.3561860918998718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6984377503395081, "epoch": 2.33, "learning_rate": 3.3371573481463594e-05, "loss": 0.7445, "step": 2754, "task_loss": 0.9806138277053833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6083628535270691, "epoch": 2.33, "learning_rate": 3.336553556333776e-05, "loss": 0.579, "step": 2755, "task_loss": 0.7562330365180969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46773093938827515, "epoch": 2.33, "learning_rate": 3.3359497645211935e-05, "loss": 0.66, "step": 2756, "task_loss": 0.3968280553817749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5666074156761169, "epoch": 2.33, "learning_rate": 3.33534597270861e-05, "loss": 0.5826, "step": 2757, "task_loss": 0.7829583883285522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48683154582977295, "epoch": 2.33, "learning_rate": 3.334742180896027e-05, "loss": 0.579, "step": 2758, "task_loss": 0.44080013036727905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7041598558425903, "epoch": 2.33, "learning_rate": 3.334138389083444e-05, "loss": 0.6286, "step": 2759, "task_loss": 1.3958628177642822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6260026097297668, "epoch": 2.33, "learning_rate": 3.333534597270861e-05, "loss": 0.6487, "step": 2760, "task_loss": 0.9102373719215393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5646286606788635, "epoch": 2.33, "learning_rate": 3.332930805458278e-05, "loss": 0.4934, "step": 2761, "task_loss": 0.2656852900981903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7398630380630493, "epoch": 2.33, "learning_rate": 3.332327013645695e-05, "loss": 0.6642, "step": 2762, "task_loss": 1.382411241531372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8210527896881104, "epoch": 2.34, "learning_rate": 3.3317232218331125e-05, "loss": 0.6028, "step": 2763, "task_loss": 1.5291996002197266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6687321662902832, "epoch": 2.34, "learning_rate": 3.331119430020529e-05, "loss": 0.5558, "step": 2764, "task_loss": 0.8035946488380432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6612465381622314, "epoch": 2.34, "learning_rate": 3.330515638207946e-05, "loss": 0.4844, "step": 2765, "task_loss": 0.5854050517082214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5326452851295471, "epoch": 2.34, "learning_rate": 3.3299118463953634e-05, "loss": 0.5215, "step": 2766, "task_loss": 0.9476829171180725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6227434873580933, "epoch": 2.34, "learning_rate": 3.32930805458278e-05, "loss": 0.6072, "step": 2767, "task_loss": 0.5240973234176636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3564571142196655, "epoch": 2.34, "learning_rate": 3.328704262770197e-05, "loss": 0.616, "step": 2768, "task_loss": 0.09839734435081482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45204779505729675, "epoch": 2.34, "learning_rate": 3.328100470957614e-05, "loss": 0.5806, "step": 2769, "task_loss": 0.31881874799728394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8447141647338867, "epoch": 2.34, "learning_rate": 3.327496679145031e-05, "loss": 0.594, "step": 2770, "task_loss": 0.9034940004348755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9144049882888794, "epoch": 2.34, "learning_rate": 3.3268928873324476e-05, "loss": 0.6464, "step": 2771, "task_loss": 0.29734131693840027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5363218188285828, "epoch": 2.34, "learning_rate": 3.326289095519865e-05, "loss": 0.5805, "step": 2772, "task_loss": 1.5897819995880127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27432161569595337, "epoch": 2.34, "learning_rate": 3.3256853037072824e-05, "loss": 0.4929, "step": 2773, "task_loss": 0.8333072066307068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4134104251861572, "epoch": 2.34, "learning_rate": 3.3250815118946985e-05, "loss": 0.574, "step": 2774, "task_loss": 0.4348832368850708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6121277809143066, "epoch": 2.35, "learning_rate": 3.324477720082116e-05, "loss": 0.5994, "step": 2775, "task_loss": 0.3006189465522766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.560478150844574, "epoch": 2.35, "learning_rate": 3.323873928269533e-05, "loss": 0.6251, "step": 2776, "task_loss": 1.0094798803329468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7083741426467896, "epoch": 2.35, "learning_rate": 3.323270136456949e-05, "loss": 0.5225, "step": 2777, "task_loss": 0.7357813715934753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5041173696517944, "epoch": 2.35, "learning_rate": 3.322666344644367e-05, "loss": 0.4445, "step": 2778, "task_loss": 0.29980090260505676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6084166169166565, "epoch": 2.35, "learning_rate": 3.322062552831784e-05, "loss": 0.5841, "step": 2779, "task_loss": 0.9006649255752563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4408206343650818, "epoch": 2.35, "learning_rate": 3.321458761019201e-05, "loss": 0.6801, "step": 2780, "task_loss": 0.40420588850975037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3436402678489685, "epoch": 2.35, "learning_rate": 3.3208549692066175e-05, "loss": 0.604, "step": 2781, "task_loss": 1.2070915699005127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5139382481575012, "epoch": 2.35, "learning_rate": 3.320251177394035e-05, "loss": 0.6822, "step": 2782, "task_loss": 0.7160767316818237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6139983534812927, "epoch": 2.35, "learning_rate": 3.3196473855814516e-05, "loss": 0.569, "step": 2783, "task_loss": 0.9521323442459106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6410754919052124, "epoch": 2.35, "learning_rate": 3.3190435937688684e-05, "loss": 0.4327, "step": 2784, "task_loss": 0.9063396453857422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0506900548934937, "epoch": 2.35, "learning_rate": 3.318439801956286e-05, "loss": 0.8823, "step": 2785, "task_loss": 0.7631728649139404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6342801451683044, "epoch": 2.35, "learning_rate": 3.317836010143703e-05, "loss": 0.8439, "step": 2786, "task_loss": 0.9588278532028198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5530285835266113, "epoch": 2.36, "learning_rate": 3.317232218331119e-05, "loss": 0.738, "step": 2787, "task_loss": 1.088077425956726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43498480319976807, "epoch": 2.36, "learning_rate": 3.3166284265185366e-05, "loss": 0.5813, "step": 2788, "task_loss": 0.6242617964744568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8777176141738892, "epoch": 2.36, "learning_rate": 3.316024634705954e-05, "loss": 0.8205, "step": 2789, "task_loss": 0.9611509442329407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40628373622894287, "epoch": 2.36, "learning_rate": 3.31542084289337e-05, "loss": 0.631, "step": 2790, "task_loss": 0.5163662433624268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6069014668464661, "epoch": 2.36, "learning_rate": 3.3148170510807874e-05, "loss": 0.5598, "step": 2791, "task_loss": 0.7276567220687866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.780820369720459, "epoch": 2.36, "learning_rate": 3.314213259268205e-05, "loss": 0.6033, "step": 2792, "task_loss": 0.6724525690078735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1987322568893433, "epoch": 2.36, "learning_rate": 3.3136094674556215e-05, "loss": 0.7176, "step": 2793, "task_loss": 1.5199012756347656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5361475944519043, "epoch": 2.36, "learning_rate": 3.313005675643038e-05, "loss": 0.7746, "step": 2794, "task_loss": 0.5563704371452332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4461820125579834, "epoch": 2.36, "learning_rate": 3.3124018838304557e-05, "loss": 0.7269, "step": 2795, "task_loss": 0.6786623597145081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8231344819068909, "epoch": 2.36, "learning_rate": 3.3117980920178724e-05, "loss": 0.698, "step": 2796, "task_loss": 0.6138530969619751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4975292980670929, "epoch": 2.36, "learning_rate": 3.311194300205289e-05, "loss": 0.5799, "step": 2797, "task_loss": 0.8980240225791931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.668231725692749, "epoch": 2.36, "learning_rate": 3.3105905083927065e-05, "loss": 0.5726, "step": 2798, "task_loss": 0.32051751017570496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5792865753173828, "epoch": 2.37, "learning_rate": 3.309986716580123e-05, "loss": 0.4983, "step": 2799, "task_loss": 1.1736302375793457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9329530000686646, "epoch": 2.37, "learning_rate": 3.30938292476754e-05, "loss": 0.643, "step": 2800, "task_loss": 0.9752650856971741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3679473102092743, "epoch": 2.37, "learning_rate": 3.308779132954957e-05, "loss": 0.5729, "step": 2801, "task_loss": 0.783528745174408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49544674158096313, "epoch": 2.37, "learning_rate": 3.308175341142374e-05, "loss": 0.5663, "step": 2802, "task_loss": 0.7515592575073242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6081817746162415, "epoch": 2.37, "learning_rate": 3.3075715493297914e-05, "loss": 0.5969, "step": 2803, "task_loss": 0.621507465839386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44013872742652893, "epoch": 2.37, "learning_rate": 3.306967757517208e-05, "loss": 0.7064, "step": 2804, "task_loss": 0.8408189415931702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6836858987808228, "epoch": 2.37, "learning_rate": 3.3063639657046255e-05, "loss": 0.673, "step": 2805, "task_loss": 1.309217929840088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5562867522239685, "epoch": 2.37, "learning_rate": 3.305760173892042e-05, "loss": 0.7213, "step": 2806, "task_loss": 0.7211648225784302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46764540672302246, "epoch": 2.37, "learning_rate": 3.305156382079459e-05, "loss": 0.624, "step": 2807, "task_loss": 0.6373986601829529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6296800374984741, "epoch": 2.37, "learning_rate": 3.3045525902668764e-05, "loss": 0.4928, "step": 2808, "task_loss": 1.1562979221343994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6109154224395752, "epoch": 2.37, "learning_rate": 3.303948798454293e-05, "loss": 0.4768, "step": 2809, "task_loss": 0.7308464646339417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7123075723648071, "epoch": 2.38, "learning_rate": 3.30334500664171e-05, "loss": 0.6065, "step": 2810, "task_loss": 0.7608975172042847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5486956238746643, "epoch": 2.38, "learning_rate": 3.302741214829127e-05, "loss": 0.7093, "step": 2811, "task_loss": 0.7031572461128235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6866428852081299, "epoch": 2.38, "learning_rate": 3.302137423016544e-05, "loss": 0.5807, "step": 2812, "task_loss": 0.5745305418968201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35208389163017273, "epoch": 2.38, "learning_rate": 3.301533631203961e-05, "loss": 0.5883, "step": 2813, "task_loss": 1.4585151672363281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4256187081336975, "epoch": 2.38, "learning_rate": 3.300929839391378e-05, "loss": 0.4905, "step": 2814, "task_loss": 0.29614943265914917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6201909780502319, "epoch": 2.38, "learning_rate": 3.300326047578795e-05, "loss": 0.6626, "step": 2815, "task_loss": 0.7178217768669128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6732895374298096, "epoch": 2.38, "learning_rate": 3.299722255766212e-05, "loss": 0.6625, "step": 2816, "task_loss": 0.7299395203590393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6647729873657227, "epoch": 2.38, "learning_rate": 3.299118463953629e-05, "loss": 0.5233, "step": 2817, "task_loss": 0.8399394154548645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30670925974845886, "epoch": 2.38, "learning_rate": 3.2985146721410456e-05, "loss": 0.5226, "step": 2818, "task_loss": 0.09771306812763214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4087892174720764, "epoch": 2.38, "learning_rate": 3.297910880328463e-05, "loss": 0.4598, "step": 2819, "task_loss": 0.4444400668144226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6689381003379822, "epoch": 2.38, "learning_rate": 3.29730708851588e-05, "loss": 0.4917, "step": 2820, "task_loss": 1.4054523706436157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0669728517532349, "epoch": 2.38, "learning_rate": 3.296703296703297e-05, "loss": 0.6452, "step": 2821, "task_loss": 0.6268378496170044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4681284725666046, "epoch": 2.39, "learning_rate": 3.296099504890714e-05, "loss": 0.5965, "step": 2822, "task_loss": 0.7232109904289246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5323065519332886, "epoch": 2.39, "learning_rate": 3.2954957130781305e-05, "loss": 0.5461, "step": 2823, "task_loss": 1.0424813032150269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40526527166366577, "epoch": 2.39, "learning_rate": 3.294891921265548e-05, "loss": 0.5379, "step": 2824, "task_loss": 0.4252692759037018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3764333128929138, "epoch": 2.39, "learning_rate": 3.2942881294529647e-05, "loss": 0.835, "step": 2825, "task_loss": 0.6285123229026794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7652390599250793, "epoch": 2.39, "learning_rate": 3.293684337640382e-05, "loss": 0.6082, "step": 2826, "task_loss": 0.8298907279968262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49225470423698425, "epoch": 2.39, "learning_rate": 3.293080545827799e-05, "loss": 0.5804, "step": 2827, "task_loss": 0.4475257694721222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4768330156803131, "epoch": 2.39, "learning_rate": 3.2924767540152155e-05, "loss": 0.5476, "step": 2828, "task_loss": 0.5845743417739868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4878935217857361, "epoch": 2.39, "learning_rate": 3.291872962202633e-05, "loss": 0.5603, "step": 2829, "task_loss": 1.3241593837738037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6116279363632202, "epoch": 2.39, "learning_rate": 3.2912691703900496e-05, "loss": 0.7082, "step": 2830, "task_loss": 0.47449108958244324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4723677933216095, "epoch": 2.39, "learning_rate": 3.290665378577466e-05, "loss": 0.5767, "step": 2831, "task_loss": 0.2858136296272278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5165239572525024, "epoch": 2.39, "learning_rate": 3.290061586764884e-05, "loss": 0.6284, "step": 2832, "task_loss": 0.3239780068397522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4609326720237732, "epoch": 2.39, "learning_rate": 3.2894577949523004e-05, "loss": 0.4556, "step": 2833, "task_loss": 0.7735064625740051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33696940541267395, "epoch": 2.4, "learning_rate": 3.288854003139717e-05, "loss": 0.6819, "step": 2834, "task_loss": 0.6627004742622375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3876693844795227, "epoch": 2.4, "learning_rate": 3.2882502113271346e-05, "loss": 0.5389, "step": 2835, "task_loss": 0.4795006215572357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.732984185218811, "epoch": 2.4, "learning_rate": 3.287646419514552e-05, "loss": 0.6297, "step": 2836, "task_loss": 1.2740187644958496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.484604150056839, "epoch": 2.4, "learning_rate": 3.287042627701969e-05, "loss": 0.5155, "step": 2837, "task_loss": 0.591458797454834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4378851056098938, "epoch": 2.4, "learning_rate": 3.2864388358893854e-05, "loss": 0.7277, "step": 2838, "task_loss": 0.4500393271446228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5216506719589233, "epoch": 2.4, "learning_rate": 3.285835044076803e-05, "loss": 0.5995, "step": 2839, "task_loss": 0.6803943514823914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5027228593826294, "epoch": 2.4, "learning_rate": 3.2852312522642195e-05, "loss": 0.6214, "step": 2840, "task_loss": 0.9790998101234436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48640987277030945, "epoch": 2.4, "learning_rate": 3.284627460451636e-05, "loss": 0.7043, "step": 2841, "task_loss": 1.4227588176727295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9625617265701294, "epoch": 2.4, "learning_rate": 3.2840236686390536e-05, "loss": 0.6644, "step": 2842, "task_loss": 0.5224199891090393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46689051389694214, "epoch": 2.4, "learning_rate": 3.28341987682647e-05, "loss": 0.6162, "step": 2843, "task_loss": 0.49498599767684937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3928931653499603, "epoch": 2.4, "learning_rate": 3.282816085013887e-05, "loss": 0.7715, "step": 2844, "task_loss": 0.5139907002449036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6458635330200195, "epoch": 2.4, "learning_rate": 3.2822122932013044e-05, "loss": 0.5253, "step": 2845, "task_loss": 0.6029022932052612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32326799631118774, "epoch": 2.41, "learning_rate": 3.281608501388722e-05, "loss": 0.6005, "step": 2846, "task_loss": 0.27409306168556213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5764963626861572, "epoch": 2.41, "learning_rate": 3.281004709576138e-05, "loss": 0.6385, "step": 2847, "task_loss": 0.8189858198165894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7281851768493652, "epoch": 2.41, "learning_rate": 3.280400917763555e-05, "loss": 0.677, "step": 2848, "task_loss": 0.34248557686805725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7666088938713074, "epoch": 2.41, "learning_rate": 3.279797125950973e-05, "loss": 0.8752, "step": 2849, "task_loss": 0.42437294125556946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8788058757781982, "epoch": 2.41, "learning_rate": 3.279193334138389e-05, "loss": 0.657, "step": 2850, "task_loss": 0.5898709893226624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6753508448600769, "epoch": 2.41, "learning_rate": 3.278589542325806e-05, "loss": 0.5582, "step": 2851, "task_loss": 0.4467654526233673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4347940683364868, "epoch": 2.41, "learning_rate": 3.2779857505132235e-05, "loss": 0.6333, "step": 2852, "task_loss": 1.076852798461914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3389336168766022, "epoch": 2.41, "learning_rate": 3.27738195870064e-05, "loss": 0.6317, "step": 2853, "task_loss": 0.11729887127876282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7760937213897705, "epoch": 2.41, "learning_rate": 3.276778166888057e-05, "loss": 0.6422, "step": 2854, "task_loss": 0.7475221753120422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7142821550369263, "epoch": 2.41, "learning_rate": 3.276174375075474e-05, "loss": 0.6245, "step": 2855, "task_loss": 1.3388965129852295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48236343264579773, "epoch": 2.41, "learning_rate": 3.275570583262891e-05, "loss": 0.6009, "step": 2856, "task_loss": 1.0317808389663696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33504003286361694, "epoch": 2.41, "learning_rate": 3.274966791450308e-05, "loss": 0.5153, "step": 2857, "task_loss": 1.0252275466918945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8159103393554688, "epoch": 2.42, "learning_rate": 3.274362999637725e-05, "loss": 0.6019, "step": 2858, "task_loss": 1.3009151220321655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7553880214691162, "epoch": 2.42, "learning_rate": 3.273759207825142e-05, "loss": 0.6697, "step": 2859, "task_loss": 0.5085935592651367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42015382647514343, "epoch": 2.42, "learning_rate": 3.2731554160125586e-05, "loss": 0.5537, "step": 2860, "task_loss": 0.5719116926193237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6091506481170654, "epoch": 2.42, "learning_rate": 3.272551624199976e-05, "loss": 0.5297, "step": 2861, "task_loss": 1.0025396347045898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.536212682723999, "epoch": 2.42, "learning_rate": 3.2719478323873934e-05, "loss": 0.6427, "step": 2862, "task_loss": 0.5003145337104797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4331235885620117, "epoch": 2.42, "learning_rate": 3.2713440405748094e-05, "loss": 0.5927, "step": 2863, "task_loss": 1.1593923568725586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5048621296882629, "epoch": 2.42, "learning_rate": 3.270740248762227e-05, "loss": 0.4143, "step": 2864, "task_loss": 0.7019749283790588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41473686695098877, "epoch": 2.42, "learning_rate": 3.270136456949644e-05, "loss": 0.6468, "step": 2865, "task_loss": 0.11740263551473618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7408928275108337, "epoch": 2.42, "learning_rate": 3.269532665137061e-05, "loss": 0.7261, "step": 2866, "task_loss": 1.162772297859192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5453040599822998, "epoch": 2.42, "learning_rate": 3.268928873324478e-05, "loss": 0.62, "step": 2867, "task_loss": 0.21947363018989563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42235898971557617, "epoch": 2.42, "learning_rate": 3.268325081511895e-05, "loss": 0.5257, "step": 2868, "task_loss": 0.7535826563835144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34398654103279114, "epoch": 2.42, "learning_rate": 3.267721289699312e-05, "loss": 0.6317, "step": 2869, "task_loss": 0.3219203054904938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9197566509246826, "epoch": 2.43, "learning_rate": 3.2671174978867285e-05, "loss": 0.6235, "step": 2870, "task_loss": 0.5501262545585632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5773872137069702, "epoch": 2.43, "learning_rate": 3.266513706074146e-05, "loss": 0.5525, "step": 2871, "task_loss": 1.0739257335662842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9475022554397583, "epoch": 2.43, "learning_rate": 3.2659099142615626e-05, "loss": 0.7217, "step": 2872, "task_loss": 0.7980193495750427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4029132127761841, "epoch": 2.43, "learning_rate": 3.265306122448979e-05, "loss": 0.4247, "step": 2873, "task_loss": 0.7516294717788696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5290862321853638, "epoch": 2.43, "learning_rate": 3.264702330636397e-05, "loss": 0.5183, "step": 2874, "task_loss": 0.3069802522659302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7835856080055237, "epoch": 2.43, "learning_rate": 3.2640985388238134e-05, "loss": 0.768, "step": 2875, "task_loss": 1.9966663122177124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5237045288085938, "epoch": 2.43, "learning_rate": 3.263494747011231e-05, "loss": 0.5419, "step": 2876, "task_loss": 0.7912609577178955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5864595174789429, "epoch": 2.43, "learning_rate": 3.2628909551986476e-05, "loss": 0.4881, "step": 2877, "task_loss": 0.7694368362426758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.770035982131958, "epoch": 2.43, "learning_rate": 3.262287163386065e-05, "loss": 0.7724, "step": 2878, "task_loss": 1.110260009765625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3587909936904907, "epoch": 2.43, "learning_rate": 3.261683371573482e-05, "loss": 0.4831, "step": 2879, "task_loss": 0.21643294394016266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7017934322357178, "epoch": 2.43, "learning_rate": 3.2610795797608984e-05, "loss": 0.5464, "step": 2880, "task_loss": 0.6179145574569702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.036116600036621, "epoch": 2.44, "learning_rate": 3.260475787948316e-05, "loss": 0.7032, "step": 2881, "task_loss": 0.7335235476493835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7934672832489014, "epoch": 2.44, "learning_rate": 3.2598719961357325e-05, "loss": 0.6382, "step": 2882, "task_loss": 0.7901463508605957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8496864438056946, "epoch": 2.44, "learning_rate": 3.259268204323149e-05, "loss": 0.5305, "step": 2883, "task_loss": 0.6312875747680664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4108898937702179, "epoch": 2.44, "learning_rate": 3.2586644125105666e-05, "loss": 0.4445, "step": 2884, "task_loss": 0.17121045291423798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.026235580444336, "epoch": 2.44, "learning_rate": 3.2580606206979833e-05, "loss": 0.5851, "step": 2885, "task_loss": 0.8576310276985168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5468770861625671, "epoch": 2.44, "learning_rate": 3.257456828885401e-05, "loss": 0.8292, "step": 2886, "task_loss": 0.8485537171363831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3534657955169678, "epoch": 2.44, "learning_rate": 3.2568530370728175e-05, "loss": 0.5193, "step": 2887, "task_loss": 0.3923889994621277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40614718198776245, "epoch": 2.44, "learning_rate": 3.256249245260234e-05, "loss": 0.513, "step": 2888, "task_loss": 0.4681490659713745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.412251353263855, "epoch": 2.44, "learning_rate": 3.2556454534476516e-05, "loss": 0.4915, "step": 2889, "task_loss": 0.518799364566803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.572196900844574, "epoch": 2.44, "learning_rate": 3.255041661635068e-05, "loss": 0.6286, "step": 2890, "task_loss": 0.43729501962661743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5428762435913086, "epoch": 2.44, "learning_rate": 3.254437869822485e-05, "loss": 0.5507, "step": 2891, "task_loss": 0.17710742354393005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41789165139198303, "epoch": 2.44, "learning_rate": 3.2538340780099024e-05, "loss": 0.8263, "step": 2892, "task_loss": 0.11740151047706604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4326915144920349, "epoch": 2.45, "learning_rate": 3.253230286197319e-05, "loss": 0.4633, "step": 2893, "task_loss": 0.26004502177238464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4795076847076416, "epoch": 2.45, "learning_rate": 3.2526264943847365e-05, "loss": 0.5919, "step": 2894, "task_loss": 0.5128042697906494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8897210359573364, "epoch": 2.45, "learning_rate": 3.252022702572153e-05, "loss": 0.6595, "step": 2895, "task_loss": 0.4237530529499054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5470967292785645, "epoch": 2.45, "learning_rate": 3.2514189107595706e-05, "loss": 0.5296, "step": 2896, "task_loss": 0.22075428068637848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8534783124923706, "epoch": 2.45, "learning_rate": 3.2508151189469873e-05, "loss": 0.7233, "step": 2897, "task_loss": 0.953711986541748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3072449564933777, "epoch": 2.45, "learning_rate": 3.250211327134404e-05, "loss": 0.5119, "step": 2898, "task_loss": 0.38608840107917786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4856056272983551, "epoch": 2.45, "learning_rate": 3.2496075353218215e-05, "loss": 0.4898, "step": 2899, "task_loss": 0.5014217495918274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8282320499420166, "epoch": 2.45, "learning_rate": 3.249003743509238e-05, "loss": 0.8089, "step": 2900, "task_loss": 0.34289270639419556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7086615562438965, "epoch": 2.45, "learning_rate": 3.248399951696655e-05, "loss": 0.6803, "step": 2901, "task_loss": 0.9358624219894409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5321891903877258, "epoch": 2.45, "learning_rate": 3.247796159884072e-05, "loss": 0.5462, "step": 2902, "task_loss": 0.3115391731262207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5674741268157959, "epoch": 2.45, "learning_rate": 3.247192368071489e-05, "loss": 0.5297, "step": 2903, "task_loss": 0.1851855218410492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35012972354888916, "epoch": 2.45, "learning_rate": 3.246588576258906e-05, "loss": 0.5778, "step": 2904, "task_loss": 0.47021928429603577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7302767038345337, "epoch": 2.46, "learning_rate": 3.245984784446323e-05, "loss": 0.6807, "step": 2905, "task_loss": 1.3651877641677856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7471790313720703, "epoch": 2.46, "learning_rate": 3.2453809926337405e-05, "loss": 0.6719, "step": 2906, "task_loss": 1.161827564239502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3312314748764038, "epoch": 2.46, "learning_rate": 3.2447772008211566e-05, "loss": 0.5743, "step": 2907, "task_loss": 1.3019781112670898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2825240194797516, "epoch": 2.46, "learning_rate": 3.244173409008574e-05, "loss": 0.4663, "step": 2908, "task_loss": 0.3736572861671448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7227689027786255, "epoch": 2.46, "learning_rate": 3.2435696171959914e-05, "loss": 0.653, "step": 2909, "task_loss": 1.4502679109573364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8360554575920105, "epoch": 2.46, "learning_rate": 3.242965825383408e-05, "loss": 0.741, "step": 2910, "task_loss": 0.8718031048774719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3669199049472809, "epoch": 2.46, "learning_rate": 3.242362033570825e-05, "loss": 0.6157, "step": 2911, "task_loss": 0.5217685699462891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3515700697898865, "epoch": 2.46, "learning_rate": 3.241758241758242e-05, "loss": 0.5221, "step": 2912, "task_loss": 0.998009204864502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8380647301673889, "epoch": 2.46, "learning_rate": 3.241154449945659e-05, "loss": 0.6623, "step": 2913, "task_loss": 0.24320557713508606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6418933868408203, "epoch": 2.46, "learning_rate": 3.2405506581330756e-05, "loss": 0.6289, "step": 2914, "task_loss": 0.552386999130249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3929491341114044, "epoch": 2.46, "learning_rate": 3.239946866320493e-05, "loss": 0.4057, "step": 2915, "task_loss": 0.5693463683128357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7663474678993225, "epoch": 2.46, "learning_rate": 3.2393430745079104e-05, "loss": 0.798, "step": 2916, "task_loss": 1.347758412361145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3591029942035675, "epoch": 2.47, "learning_rate": 3.2387392826953265e-05, "loss": 0.6927, "step": 2917, "task_loss": 0.41073960065841675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5896298885345459, "epoch": 2.47, "learning_rate": 3.238135490882744e-05, "loss": 0.6244, "step": 2918, "task_loss": 0.4494938552379608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3736576437950134, "epoch": 2.47, "learning_rate": 3.237531699070161e-05, "loss": 0.4841, "step": 2919, "task_loss": 0.18769992887973785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.744586169719696, "epoch": 2.47, "learning_rate": 3.236927907257577e-05, "loss": 0.5423, "step": 2920, "task_loss": 0.7756705284118652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5330781936645508, "epoch": 2.47, "learning_rate": 3.236324115444995e-05, "loss": 0.6516, "step": 2921, "task_loss": 0.6967970728874207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5067437291145325, "epoch": 2.47, "learning_rate": 3.235720323632412e-05, "loss": 0.6309, "step": 2922, "task_loss": 0.6427402496337891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9518039226531982, "epoch": 2.47, "learning_rate": 3.235116531819828e-05, "loss": 0.6707, "step": 2923, "task_loss": 0.24187149107456207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3596474230289459, "epoch": 2.47, "learning_rate": 3.2345127400072455e-05, "loss": 0.7114, "step": 2924, "task_loss": 1.641987919807434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35315075516700745, "epoch": 2.47, "learning_rate": 3.233908948194663e-05, "loss": 0.5646, "step": 2925, "task_loss": 0.6433652639389038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6596123576164246, "epoch": 2.47, "learning_rate": 3.2333051563820796e-05, "loss": 0.5493, "step": 2926, "task_loss": 0.8057993650436401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44282376766204834, "epoch": 2.47, "learning_rate": 3.2327013645694964e-05, "loss": 0.4707, "step": 2927, "task_loss": 0.4897988438606262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29243195056915283, "epoch": 2.47, "learning_rate": 3.232097572756914e-05, "loss": 0.5264, "step": 2928, "task_loss": 0.5671036243438721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.402302622795105, "epoch": 2.48, "learning_rate": 3.2314937809443305e-05, "loss": 0.6223, "step": 2929, "task_loss": 0.41088807582855225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3667314648628235, "epoch": 2.48, "learning_rate": 3.230889989131747e-05, "loss": 0.4893, "step": 2930, "task_loss": 0.7560425996780396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3578633964061737, "epoch": 2.48, "learning_rate": 3.2302861973191646e-05, "loss": 0.4239, "step": 2931, "task_loss": 0.2645043730735779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7927392721176147, "epoch": 2.48, "learning_rate": 3.229682405506581e-05, "loss": 0.6943, "step": 2932, "task_loss": 1.0406783819198608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6393892765045166, "epoch": 2.48, "learning_rate": 3.229078613693998e-05, "loss": 0.5293, "step": 2933, "task_loss": 0.444949746131897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5779492855072021, "epoch": 2.48, "learning_rate": 3.2284748218814154e-05, "loss": 0.6079, "step": 2934, "task_loss": 0.88968425989151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3608275353908539, "epoch": 2.48, "learning_rate": 3.227871030068833e-05, "loss": 0.4664, "step": 2935, "task_loss": 0.0704147145152092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6070787906646729, "epoch": 2.48, "learning_rate": 3.2272672382562495e-05, "loss": 0.6241, "step": 2936, "task_loss": 0.583966851234436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42149803042411804, "epoch": 2.48, "learning_rate": 3.226663446443666e-05, "loss": 0.7899, "step": 2937, "task_loss": 0.3538093566894531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20733052492141724, "epoch": 2.48, "learning_rate": 3.2260596546310836e-05, "loss": 0.5122, "step": 2938, "task_loss": 0.05386026203632355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.2228631973266602, "epoch": 2.48, "learning_rate": 3.2254558628185004e-05, "loss": 0.7336, "step": 2939, "task_loss": 1.4865357875823975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6279694437980652, "epoch": 2.48, "learning_rate": 3.224852071005917e-05, "loss": 0.5364, "step": 2940, "task_loss": 0.6013134717941284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6831156611442566, "epoch": 2.49, "learning_rate": 3.2242482791933345e-05, "loss": 0.6547, "step": 2941, "task_loss": 1.441482663154602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7897087931632996, "epoch": 2.49, "learning_rate": 3.223644487380751e-05, "loss": 0.5637, "step": 2942, "task_loss": 1.3913180828094482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5917617082595825, "epoch": 2.49, "learning_rate": 3.223040695568168e-05, "loss": 0.671, "step": 2943, "task_loss": 0.7978176474571228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37000730633735657, "epoch": 2.49, "learning_rate": 3.222436903755585e-05, "loss": 0.5052, "step": 2944, "task_loss": 0.5714560747146606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.147936463356018, "epoch": 2.49, "learning_rate": 3.221833111943002e-05, "loss": 0.6737, "step": 2945, "task_loss": 1.156559705734253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.344260573387146, "epoch": 2.49, "learning_rate": 3.2212293201304194e-05, "loss": 0.5577, "step": 2946, "task_loss": 0.8494448661804199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5031419396400452, "epoch": 2.49, "learning_rate": 3.220625528317836e-05, "loss": 0.4695, "step": 2947, "task_loss": 1.388334035873413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4522409439086914, "epoch": 2.49, "learning_rate": 3.220021736505253e-05, "loss": 0.5255, "step": 2948, "task_loss": 0.29512348771095276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8315565586090088, "epoch": 2.49, "learning_rate": 3.21941794469267e-05, "loss": 0.7865, "step": 2949, "task_loss": 1.0558066368103027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3166593909263611, "epoch": 2.49, "learning_rate": 3.218814152880087e-05, "loss": 0.5552, "step": 2950, "task_loss": 0.5417840480804443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41014230251312256, "epoch": 2.49, "learning_rate": 3.2182103610675044e-05, "loss": 0.5136, "step": 2951, "task_loss": 0.5912534594535828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4118714928627014, "epoch": 2.5, "learning_rate": 3.217606569254921e-05, "loss": 0.6238, "step": 2952, "task_loss": 0.814059853553772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5922936201095581, "epoch": 2.5, "learning_rate": 3.217002777442338e-05, "loss": 0.6193, "step": 2953, "task_loss": 0.425933837890625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41618210077285767, "epoch": 2.5, "learning_rate": 3.216398985629755e-05, "loss": 0.4729, "step": 2954, "task_loss": 0.3698175549507141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4148009717464447, "epoch": 2.5, "learning_rate": 3.215795193817172e-05, "loss": 0.5927, "step": 2955, "task_loss": 1.3631229400634766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7126175165176392, "epoch": 2.5, "learning_rate": 3.215191402004589e-05, "loss": 0.7946, "step": 2956, "task_loss": 1.0127736330032349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6662474870681763, "epoch": 2.5, "learning_rate": 3.214587610192006e-05, "loss": 0.673, "step": 2957, "task_loss": 0.3942853510379791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.528279185295105, "epoch": 2.5, "learning_rate": 3.213983818379423e-05, "loss": 0.5525, "step": 2958, "task_loss": 0.8126226663589478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43773046135902405, "epoch": 2.5, "learning_rate": 3.21338002656684e-05, "loss": 0.3824, "step": 2959, "task_loss": 0.16547031700611115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49893274903297424, "epoch": 2.5, "learning_rate": 3.212776234754257e-05, "loss": 0.4914, "step": 2960, "task_loss": 0.22712118923664093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34703129529953003, "epoch": 2.5, "learning_rate": 3.2121724429416736e-05, "loss": 0.6941, "step": 2961, "task_loss": 0.9430565237998962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5194660425186157, "epoch": 2.5, "learning_rate": 3.211568651129091e-05, "loss": 0.5574, "step": 2962, "task_loss": 1.2828110456466675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5064191818237305, "epoch": 2.5, "learning_rate": 3.210964859316508e-05, "loss": 0.5416, "step": 2963, "task_loss": 0.5206031203269958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6643453240394592, "epoch": 2.51, "learning_rate": 3.2103610675039244e-05, "loss": 0.5542, "step": 2964, "task_loss": 0.5530579090118408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5781809091567993, "epoch": 2.51, "learning_rate": 3.209757275691342e-05, "loss": 0.6226, "step": 2965, "task_loss": 0.14351163804531097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48747724294662476, "epoch": 2.51, "learning_rate": 3.209153483878759e-05, "loss": 0.4889, "step": 2966, "task_loss": 0.7593963742256165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3394874930381775, "epoch": 2.51, "learning_rate": 3.208549692066176e-05, "loss": 0.5491, "step": 2967, "task_loss": 0.45778369903564453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6799695491790771, "epoch": 2.51, "learning_rate": 3.2079459002535926e-05, "loss": 0.5344, "step": 2968, "task_loss": 0.5064207911491394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9664282202720642, "epoch": 2.51, "learning_rate": 3.20734210844101e-05, "loss": 0.7183, "step": 2969, "task_loss": 1.1330395936965942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49355706572532654, "epoch": 2.51, "learning_rate": 3.206738316628427e-05, "loss": 0.5394, "step": 2970, "task_loss": 1.1943659782409668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2977921664714813, "epoch": 2.51, "learning_rate": 3.2061345248158435e-05, "loss": 0.3853, "step": 2971, "task_loss": 0.39077454805374146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6454964280128479, "epoch": 2.51, "learning_rate": 3.205530733003261e-05, "loss": 0.6196, "step": 2972, "task_loss": 0.5578420162200928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48980626463890076, "epoch": 2.51, "learning_rate": 3.2049269411906776e-05, "loss": 0.5668, "step": 2973, "task_loss": 1.0293186902999878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0728622674942017, "epoch": 2.51, "learning_rate": 3.204323149378094e-05, "loss": 0.6858, "step": 2974, "task_loss": 0.5834866166114807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5859804749488831, "epoch": 2.51, "learning_rate": 3.203719357565512e-05, "loss": 0.6402, "step": 2975, "task_loss": 1.44627046585083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7323035001754761, "epoch": 2.52, "learning_rate": 3.203115565752929e-05, "loss": 0.6426, "step": 2976, "task_loss": 1.228921890258789 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22146755456924438, "epoch": 2.52, "learning_rate": 3.202511773940345e-05, "loss": 0.4912, "step": 2977, "task_loss": 0.13665051758289337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6878565549850464, "epoch": 2.52, "learning_rate": 3.2019079821277625e-05, "loss": 0.4739, "step": 2978, "task_loss": 0.38843458890914917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7628998756408691, "epoch": 2.52, "learning_rate": 3.20130419031518e-05, "loss": 0.5425, "step": 2979, "task_loss": 0.23421356081962585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9596214890480042, "epoch": 2.52, "learning_rate": 3.200700398502596e-05, "loss": 0.5971, "step": 2980, "task_loss": 1.0549513101577759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4929158091545105, "epoch": 2.52, "learning_rate": 3.2000966066900134e-05, "loss": 0.5961, "step": 2981, "task_loss": 0.4262428879737854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48288261890411377, "epoch": 2.52, "learning_rate": 3.199492814877431e-05, "loss": 0.6261, "step": 2982, "task_loss": 0.4161030948162079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5035767555236816, "epoch": 2.52, "learning_rate": 3.1988890230648475e-05, "loss": 0.5763, "step": 2983, "task_loss": 0.6840193867683411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5509455800056458, "epoch": 2.52, "learning_rate": 3.198285231252264e-05, "loss": 0.6568, "step": 2984, "task_loss": 0.7101704478263855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6895180940628052, "epoch": 2.52, "learning_rate": 3.1976814394396816e-05, "loss": 0.5854, "step": 2985, "task_loss": 0.29455968737602234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27555081248283386, "epoch": 2.52, "learning_rate": 3.197077647627098e-05, "loss": 0.4324, "step": 2986, "task_loss": 0.4367891252040863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7895499467849731, "epoch": 2.52, "learning_rate": 3.196473855814515e-05, "loss": 0.5736, "step": 2987, "task_loss": 0.8419411778450012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5723545551300049, "epoch": 2.53, "learning_rate": 3.1958700640019324e-05, "loss": 0.5836, "step": 2988, "task_loss": 0.6189712285995483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45740994811058044, "epoch": 2.53, "learning_rate": 3.195266272189349e-05, "loss": 0.4512, "step": 2989, "task_loss": 0.3710614740848541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6809113025665283, "epoch": 2.53, "learning_rate": 3.194662480376766e-05, "loss": 0.5279, "step": 2990, "task_loss": 0.4501570165157318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4839143753051758, "epoch": 2.53, "learning_rate": 3.194058688564183e-05, "loss": 0.6329, "step": 2991, "task_loss": 0.9948633313179016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5098437070846558, "epoch": 2.53, "learning_rate": 3.1934548967516007e-05, "loss": 0.6273, "step": 2992, "task_loss": 0.7761217951774597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6057577133178711, "epoch": 2.53, "learning_rate": 3.192851104939017e-05, "loss": 0.5931, "step": 2993, "task_loss": 0.7050639390945435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5868501663208008, "epoch": 2.53, "learning_rate": 3.192247313126434e-05, "loss": 0.4753, "step": 2994, "task_loss": 0.6220855712890625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9255300760269165, "epoch": 2.53, "learning_rate": 3.1916435213138515e-05, "loss": 0.6456, "step": 2995, "task_loss": 0.6522166728973389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3706604242324829, "epoch": 2.53, "learning_rate": 3.1910397295012675e-05, "loss": 0.5798, "step": 2996, "task_loss": 0.7707183957099915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1803672313690186, "epoch": 2.53, "learning_rate": 3.190435937688685e-05, "loss": 0.568, "step": 2997, "task_loss": 0.6819121837615967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5269731283187866, "epoch": 2.53, "learning_rate": 3.189832145876102e-05, "loss": 0.5267, "step": 2998, "task_loss": 0.7054276466369629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6359848380088806, "epoch": 2.53, "learning_rate": 3.189228354063519e-05, "loss": 0.7408, "step": 2999, "task_loss": 0.6293384432792664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.414407879114151, "epoch": 2.54, "learning_rate": 3.188624562250936e-05, "loss": 0.6605, "step": 3000, "task_loss": 0.17926809191703796 }, { "epoch": 2.54, "eval_accuracy": 0.9012673267326733, "eval_loss": 0.3550887107849121, "eval_runtime": 336.2837, "eval_samples_per_second": 75.085, "eval_steps_per_second": 0.589, "step": 3000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43658608198165894, "epoch": 2.54, "learning_rate": 3.188020770438353e-05, "loss": 0.551, "step": 3001, "task_loss": 0.206820547580719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4702722728252411, "epoch": 2.54, "learning_rate": 3.18741697862577e-05, "loss": 0.6405, "step": 3002, "task_loss": 0.9350981116294861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31491619348526, "epoch": 2.54, "learning_rate": 3.1868131868131866e-05, "loss": 0.5801, "step": 3003, "task_loss": 0.8625852465629578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.393322229385376, "epoch": 2.54, "learning_rate": 3.186209395000604e-05, "loss": 0.577, "step": 3004, "task_loss": 0.08078984916210175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.443943589925766, "epoch": 2.54, "learning_rate": 3.185605603188021e-05, "loss": 0.5065, "step": 3005, "task_loss": 0.962027907371521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6225208640098572, "epoch": 2.54, "learning_rate": 3.1850018113754374e-05, "loss": 0.7175, "step": 3006, "task_loss": 1.3350872993469238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3694877028465271, "epoch": 2.54, "learning_rate": 3.184398019562855e-05, "loss": 0.431, "step": 3007, "task_loss": 0.5138337016105652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5766944885253906, "epoch": 2.54, "learning_rate": 3.183794227750272e-05, "loss": 0.5652, "step": 3008, "task_loss": 0.6652193665504456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6287504434585571, "epoch": 2.54, "learning_rate": 3.183190435937689e-05, "loss": 0.5511, "step": 3009, "task_loss": 1.18569815158844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4671112895011902, "epoch": 2.54, "learning_rate": 3.1825866441251057e-05, "loss": 0.4595, "step": 3010, "task_loss": 0.664934515953064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.632016658782959, "epoch": 2.54, "learning_rate": 3.181982852312523e-05, "loss": 0.5279, "step": 3011, "task_loss": 0.8740168809890747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3223557472229004, "epoch": 2.55, "learning_rate": 3.18137906049994e-05, "loss": 0.3518, "step": 3012, "task_loss": 0.23773247003555298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3895799219608307, "epoch": 2.55, "learning_rate": 3.1807752686873565e-05, "loss": 0.5045, "step": 3013, "task_loss": 0.6074259877204895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.799383282661438, "epoch": 2.55, "learning_rate": 3.180171476874774e-05, "loss": 0.535, "step": 3014, "task_loss": 0.8679770231246948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6086822152137756, "epoch": 2.55, "learning_rate": 3.1795676850621906e-05, "loss": 0.6358, "step": 3015, "task_loss": 0.5712372064590454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6957310438156128, "epoch": 2.55, "learning_rate": 3.178963893249607e-05, "loss": 0.7966, "step": 3016, "task_loss": 0.9685741662979126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7119141221046448, "epoch": 2.55, "learning_rate": 3.178360101437025e-05, "loss": 0.6995, "step": 3017, "task_loss": 1.0255916118621826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6701250672340393, "epoch": 2.55, "learning_rate": 3.1777563096244414e-05, "loss": 0.4948, "step": 3018, "task_loss": 0.6882222890853882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.681232750415802, "epoch": 2.55, "learning_rate": 3.177152517811859e-05, "loss": 0.6658, "step": 3019, "task_loss": 0.8754422068595886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22738096117973328, "epoch": 2.55, "learning_rate": 3.1765487259992755e-05, "loss": 0.4102, "step": 3020, "task_loss": 0.3214612305164337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41991910338401794, "epoch": 2.55, "learning_rate": 3.175944934186692e-05, "loss": 0.5049, "step": 3021, "task_loss": 0.49330398440361023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.046569585800171, "epoch": 2.55, "learning_rate": 3.17534114237411e-05, "loss": 0.6352, "step": 3022, "task_loss": 1.5672941207885742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4338778257369995, "epoch": 2.56, "learning_rate": 3.1747373505615264e-05, "loss": 0.5574, "step": 3023, "task_loss": 1.4628918170928955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4300194978713989, "epoch": 2.56, "learning_rate": 3.174133558748944e-05, "loss": 0.5421, "step": 3024, "task_loss": 0.1643616259098053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6281716823577881, "epoch": 2.56, "learning_rate": 3.1735297669363605e-05, "loss": 0.5929, "step": 3025, "task_loss": 0.7235777974128723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6131654977798462, "epoch": 2.56, "learning_rate": 3.172925975123777e-05, "loss": 0.6857, "step": 3026, "task_loss": 1.0966228246688843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6098487377166748, "epoch": 2.56, "learning_rate": 3.1723221833111946e-05, "loss": 0.6959, "step": 3027, "task_loss": 0.852335512638092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2755972146987915, "epoch": 2.56, "learning_rate": 3.171718391498611e-05, "loss": 0.5314, "step": 3028, "task_loss": 0.22663360834121704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5111315250396729, "epoch": 2.56, "learning_rate": 3.171114599686029e-05, "loss": 0.6989, "step": 3029, "task_loss": 0.6067384481430054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8689610362052917, "epoch": 2.56, "learning_rate": 3.1705108078734454e-05, "loss": 0.461, "step": 3030, "task_loss": 0.8647289276123047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6540648937225342, "epoch": 2.56, "learning_rate": 3.169907016060862e-05, "loss": 0.5497, "step": 3031, "task_loss": 1.1088628768920898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48809611797332764, "epoch": 2.56, "learning_rate": 3.1693032242482796e-05, "loss": 0.6025, "step": 3032, "task_loss": 0.7831252217292786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4997727870941162, "epoch": 2.56, "learning_rate": 3.168699432435696e-05, "loss": 0.6564, "step": 3033, "task_loss": 0.8634251952171326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8575547933578491, "epoch": 2.56, "learning_rate": 3.168095640623113e-05, "loss": 0.6943, "step": 3034, "task_loss": 2.315263271331787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5979822874069214, "epoch": 2.57, "learning_rate": 3.1674918488105304e-05, "loss": 0.6151, "step": 3035, "task_loss": 1.0978392362594604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6975315809249878, "epoch": 2.57, "learning_rate": 3.166888056997947e-05, "loss": 0.5718, "step": 3036, "task_loss": 1.7038705348968506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3611481487751007, "epoch": 2.57, "learning_rate": 3.166284265185364e-05, "loss": 0.5915, "step": 3037, "task_loss": 0.4306747615337372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4248060882091522, "epoch": 2.57, "learning_rate": 3.165680473372781e-05, "loss": 0.4629, "step": 3038, "task_loss": 1.146122694015503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.721474826335907, "epoch": 2.57, "learning_rate": 3.1650766815601986e-05, "loss": 0.5863, "step": 3039, "task_loss": 1.2650773525238037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6566260457038879, "epoch": 2.57, "learning_rate": 3.164472889747615e-05, "loss": 0.6253, "step": 3040, "task_loss": 1.181181788444519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7062250375747681, "epoch": 2.57, "learning_rate": 3.163869097935032e-05, "loss": 0.688, "step": 3041, "task_loss": 0.8184826970100403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4957979619503021, "epoch": 2.57, "learning_rate": 3.1632653061224494e-05, "loss": 0.4465, "step": 3042, "task_loss": 1.0489330291748047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.3719663619995117, "epoch": 2.57, "learning_rate": 3.162661514309866e-05, "loss": 0.855, "step": 3043, "task_loss": 1.1340440511703491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5093420743942261, "epoch": 2.57, "learning_rate": 3.162057722497283e-05, "loss": 0.5879, "step": 3044, "task_loss": 0.6181932091712952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6852911710739136, "epoch": 2.57, "learning_rate": 3.1614539306847e-05, "loss": 0.7313, "step": 3045, "task_loss": 0.974119246006012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4047572612762451, "epoch": 2.57, "learning_rate": 3.160850138872117e-05, "loss": 0.5116, "step": 3046, "task_loss": 0.1525602638721466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5810412764549255, "epoch": 2.58, "learning_rate": 3.160246347059534e-05, "loss": 0.4654, "step": 3047, "task_loss": 0.41366297006607056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8525897860527039, "epoch": 2.58, "learning_rate": 3.159642555246951e-05, "loss": 0.5539, "step": 3048, "task_loss": 1.8460111618041992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.553343653678894, "epoch": 2.58, "learning_rate": 3.1590387634343685e-05, "loss": 0.6257, "step": 3049, "task_loss": 1.1973322629928589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1103678941726685, "epoch": 2.58, "learning_rate": 3.1584349716217846e-05, "loss": 0.8512, "step": 3050, "task_loss": 0.3846682012081146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7412424087524414, "epoch": 2.58, "learning_rate": 3.157831179809202e-05, "loss": 0.5527, "step": 3051, "task_loss": 1.0514795780181885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1389185190200806, "epoch": 2.58, "learning_rate": 3.1572273879966193e-05, "loss": 0.6444, "step": 3052, "task_loss": 0.8781148791313171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4839778244495392, "epoch": 2.58, "learning_rate": 3.1566235961840354e-05, "loss": 0.5449, "step": 3053, "task_loss": 0.8286348581314087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41638538241386414, "epoch": 2.58, "learning_rate": 3.156019804371453e-05, "loss": 0.4401, "step": 3054, "task_loss": 0.5622422695159912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7078648805618286, "epoch": 2.58, "learning_rate": 3.15541601255887e-05, "loss": 0.7693, "step": 3055, "task_loss": 1.2607444524765015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43196409940719604, "epoch": 2.58, "learning_rate": 3.154812220746287e-05, "loss": 0.5404, "step": 3056, "task_loss": 0.9137256741523743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30233436822891235, "epoch": 2.58, "learning_rate": 3.1542084289337036e-05, "loss": 0.4265, "step": 3057, "task_loss": 0.37320053577423096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37178027629852295, "epoch": 2.58, "learning_rate": 3.153604637121121e-05, "loss": 0.6543, "step": 3058, "task_loss": 1.2685492038726807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21882490813732147, "epoch": 2.59, "learning_rate": 3.153000845308538e-05, "loss": 0.4182, "step": 3059, "task_loss": 0.05716512352228165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40480393171310425, "epoch": 2.59, "learning_rate": 3.1523970534959544e-05, "loss": 0.5031, "step": 3060, "task_loss": 0.9153790473937988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40661242604255676, "epoch": 2.59, "learning_rate": 3.151793261683372e-05, "loss": 0.5823, "step": 3061, "task_loss": 0.5139671564102173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6444534659385681, "epoch": 2.59, "learning_rate": 3.1511894698707886e-05, "loss": 0.6773, "step": 3062, "task_loss": 1.0788401365280151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5531365275382996, "epoch": 2.59, "learning_rate": 3.150585678058205e-05, "loss": 0.8016, "step": 3063, "task_loss": 0.8503678441047668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4501515030860901, "epoch": 2.59, "learning_rate": 3.149981886245623e-05, "loss": 0.5711, "step": 3064, "task_loss": 1.0843968391418457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8872722387313843, "epoch": 2.59, "learning_rate": 3.14937809443304e-05, "loss": 0.6268, "step": 3065, "task_loss": 0.8899143934249878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3328050374984741, "epoch": 2.59, "learning_rate": 3.148774302620456e-05, "loss": 0.3814, "step": 3066, "task_loss": 0.4451062083244324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44811758399009705, "epoch": 2.59, "learning_rate": 3.1481705108078735e-05, "loss": 0.4917, "step": 3067, "task_loss": 0.5147121548652649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6116326451301575, "epoch": 2.59, "learning_rate": 3.147566718995291e-05, "loss": 0.5015, "step": 3068, "task_loss": 0.8442555665969849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4729713797569275, "epoch": 2.59, "learning_rate": 3.1469629271827076e-05, "loss": 0.5615, "step": 3069, "task_loss": 1.055061936378479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26743969321250916, "epoch": 2.59, "learning_rate": 3.146359135370124e-05, "loss": 0.5132, "step": 3070, "task_loss": 0.5553213953971863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6915301084518433, "epoch": 2.6, "learning_rate": 3.145755343557542e-05, "loss": 0.6659, "step": 3071, "task_loss": 1.4327071905136108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6283551454544067, "epoch": 2.6, "learning_rate": 3.1451515517449585e-05, "loss": 0.5487, "step": 3072, "task_loss": 0.35306286811828613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4547485411167145, "epoch": 2.6, "learning_rate": 3.144547759932375e-05, "loss": 0.6823, "step": 3073, "task_loss": 0.88480544090271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5392305254936218, "epoch": 2.6, "learning_rate": 3.1439439681197926e-05, "loss": 0.5177, "step": 3074, "task_loss": 0.34602269530296326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4525147080421448, "epoch": 2.6, "learning_rate": 3.143340176307209e-05, "loss": 0.4708, "step": 3075, "task_loss": 0.5885899662971497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5197232961654663, "epoch": 2.6, "learning_rate": 3.142736384494626e-05, "loss": 0.6127, "step": 3076, "task_loss": 1.259156584739685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3624635934829712, "epoch": 2.6, "learning_rate": 3.1421325926820434e-05, "loss": 0.4638, "step": 3077, "task_loss": 1.2175028324127197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38624659180641174, "epoch": 2.6, "learning_rate": 3.14152880086946e-05, "loss": 0.494, "step": 3078, "task_loss": 0.6910259127616882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8234136700630188, "epoch": 2.6, "learning_rate": 3.1409250090568775e-05, "loss": 0.5226, "step": 3079, "task_loss": 0.9123654961585999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5328894853591919, "epoch": 2.6, "learning_rate": 3.140321217244294e-05, "loss": 0.5968, "step": 3080, "task_loss": 0.3305851221084595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5333681106567383, "epoch": 2.6, "learning_rate": 3.1397174254317116e-05, "loss": 0.6812, "step": 3081, "task_loss": 0.9403561949729919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5307839512825012, "epoch": 2.6, "learning_rate": 3.1391136336191283e-05, "loss": 0.5981, "step": 3082, "task_loss": 1.1259934902191162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40721645951271057, "epoch": 2.61, "learning_rate": 3.138509841806545e-05, "loss": 0.4885, "step": 3083, "task_loss": 0.6500892043113708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6663417816162109, "epoch": 2.61, "learning_rate": 3.1379060499939625e-05, "loss": 0.5457, "step": 3084, "task_loss": 1.341795563697815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6981385946273804, "epoch": 2.61, "learning_rate": 3.137302258181379e-05, "loss": 0.5239, "step": 3085, "task_loss": 0.9905421733856201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42248767614364624, "epoch": 2.61, "learning_rate": 3.136698466368796e-05, "loss": 0.4761, "step": 3086, "task_loss": 0.1908734142780304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.333567351102829, "epoch": 2.61, "learning_rate": 3.136094674556213e-05, "loss": 0.4312, "step": 3087, "task_loss": 0.3608177900314331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4839281141757965, "epoch": 2.61, "learning_rate": 3.13549088274363e-05, "loss": 0.6958, "step": 3088, "task_loss": 0.9797853231430054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3614383339881897, "epoch": 2.61, "learning_rate": 3.1348870909310474e-05, "loss": 0.4884, "step": 3089, "task_loss": 0.38326770067214966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3943230211734772, "epoch": 2.61, "learning_rate": 3.134283299118464e-05, "loss": 0.4837, "step": 3090, "task_loss": 0.5186907052993774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.617715060710907, "epoch": 2.61, "learning_rate": 3.133679507305881e-05, "loss": 0.4717, "step": 3091, "task_loss": 0.16252771019935608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34674111008644104, "epoch": 2.61, "learning_rate": 3.133075715493298e-05, "loss": 0.4351, "step": 3092, "task_loss": 0.9474419951438904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6893690824508667, "epoch": 2.61, "learning_rate": 3.132471923680715e-05, "loss": 0.5919, "step": 3093, "task_loss": 1.0146546363830566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8046548366546631, "epoch": 2.61, "learning_rate": 3.131868131868132e-05, "loss": 0.6078, "step": 3094, "task_loss": 0.7618483901023865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40461912751197815, "epoch": 2.62, "learning_rate": 3.131264340055549e-05, "loss": 0.6837, "step": 3095, "task_loss": 0.9483104348182678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39579451084136963, "epoch": 2.62, "learning_rate": 3.130660548242966e-05, "loss": 0.5426, "step": 3096, "task_loss": 0.3109184205532074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6089165210723877, "epoch": 2.62, "learning_rate": 3.130056756430383e-05, "loss": 0.4582, "step": 3097, "task_loss": 0.647036075592041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45473670959472656, "epoch": 2.62, "learning_rate": 3.1294529646178e-05, "loss": 0.524, "step": 3098, "task_loss": 1.2037817239761353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5189012289047241, "epoch": 2.62, "learning_rate": 3.128849172805217e-05, "loss": 0.4869, "step": 3099, "task_loss": 0.9567164778709412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4907497763633728, "epoch": 2.62, "learning_rate": 3.128245380992634e-05, "loss": 0.5983, "step": 3100, "task_loss": 0.5482205152511597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6670905947685242, "epoch": 2.62, "learning_rate": 3.127641589180051e-05, "loss": 0.5318, "step": 3101, "task_loss": 1.7884080410003662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4513081908226013, "epoch": 2.62, "learning_rate": 3.127037797367468e-05, "loss": 0.5859, "step": 3102, "task_loss": 1.0610734224319458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.353179395198822, "epoch": 2.62, "learning_rate": 3.126434005554885e-05, "loss": 0.5392, "step": 3103, "task_loss": 0.1113758459687233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4510411024093628, "epoch": 2.62, "learning_rate": 3.1258302137423016e-05, "loss": 0.4817, "step": 3104, "task_loss": 0.5293875932693481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3470337390899658, "epoch": 2.62, "learning_rate": 3.125226421929719e-05, "loss": 0.4902, "step": 3105, "task_loss": 0.2473694086074829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2790483832359314, "epoch": 2.63, "learning_rate": 3.124622630117136e-05, "loss": 0.5384, "step": 3106, "task_loss": 0.23984269797801971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.587673008441925, "epoch": 2.63, "learning_rate": 3.1240188383045524e-05, "loss": 0.479, "step": 3107, "task_loss": 0.27161914110183716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3689296841621399, "epoch": 2.63, "learning_rate": 3.12341504649197e-05, "loss": 0.5778, "step": 3108, "task_loss": 0.6506506204605103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4823654890060425, "epoch": 2.63, "learning_rate": 3.122811254679387e-05, "loss": 0.5626, "step": 3109, "task_loss": 0.8713473081588745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3520318567752838, "epoch": 2.63, "learning_rate": 3.122207462866803e-05, "loss": 0.4913, "step": 3110, "task_loss": 0.37498822808265686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42476987838745117, "epoch": 2.63, "learning_rate": 3.1216036710542206e-05, "loss": 0.5452, "step": 3111, "task_loss": 0.9905713796615601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6528244018554688, "epoch": 2.63, "learning_rate": 3.120999879241638e-05, "loss": 0.6556, "step": 3112, "task_loss": 0.739977240562439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44800108671188354, "epoch": 2.63, "learning_rate": 3.120396087429055e-05, "loss": 0.4163, "step": 3113, "task_loss": 0.4454851448535919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.54795241355896, "epoch": 2.63, "learning_rate": 3.1197922956164715e-05, "loss": 0.5661, "step": 3114, "task_loss": 1.7704194784164429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9119563102722168, "epoch": 2.63, "learning_rate": 3.119188503803889e-05, "loss": 0.5563, "step": 3115, "task_loss": 0.6954388618469238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45026257634162903, "epoch": 2.63, "learning_rate": 3.1185847119913056e-05, "loss": 0.4817, "step": 3116, "task_loss": 0.8320226669311523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6895595788955688, "epoch": 2.63, "learning_rate": 3.117980920178722e-05, "loss": 0.539, "step": 3117, "task_loss": 1.4042401313781738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8737090826034546, "epoch": 2.64, "learning_rate": 3.11737712836614e-05, "loss": 0.6364, "step": 3118, "task_loss": 1.9792286157608032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5807375311851501, "epoch": 2.64, "learning_rate": 3.1167733365535564e-05, "loss": 0.4336, "step": 3119, "task_loss": 0.24984240531921387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.251182496547699, "epoch": 2.64, "learning_rate": 3.116169544740973e-05, "loss": 0.571, "step": 3120, "task_loss": 0.041671521961688995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2791842520236969, "epoch": 2.64, "learning_rate": 3.1155657529283905e-05, "loss": 0.4231, "step": 3121, "task_loss": 0.2998557984828949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3116554021835327, "epoch": 2.64, "learning_rate": 3.114961961115808e-05, "loss": 0.5257, "step": 3122, "task_loss": 0.08239234983921051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5533818602561951, "epoch": 2.64, "learning_rate": 3.114358169303224e-05, "loss": 0.5498, "step": 3123, "task_loss": 0.911818265914917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42376255989074707, "epoch": 2.64, "learning_rate": 3.1137543774906414e-05, "loss": 0.5472, "step": 3124, "task_loss": 0.6121549606323242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5263432264328003, "epoch": 2.64, "learning_rate": 3.113150585678059e-05, "loss": 0.5495, "step": 3125, "task_loss": 0.4927977919578552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18508455157279968, "epoch": 2.64, "learning_rate": 3.112546793865475e-05, "loss": 0.4751, "step": 3126, "task_loss": 0.16055083274841309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39457792043685913, "epoch": 2.64, "learning_rate": 3.111943002052892e-05, "loss": 0.5459, "step": 3127, "task_loss": 0.672694742679596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3309198319911957, "epoch": 2.64, "learning_rate": 3.1113392102403096e-05, "loss": 0.5313, "step": 3128, "task_loss": 0.721457302570343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37272799015045166, "epoch": 2.64, "learning_rate": 3.110735418427726e-05, "loss": 0.6614, "step": 3129, "task_loss": 0.5475585460662842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44334080815315247, "epoch": 2.65, "learning_rate": 3.110131626615143e-05, "loss": 0.5492, "step": 3130, "task_loss": 0.9232137203216553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44525477290153503, "epoch": 2.65, "learning_rate": 3.1095278348025604e-05, "loss": 0.5658, "step": 3131, "task_loss": 0.17069873213768005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2749066948890686, "epoch": 2.65, "learning_rate": 3.108924042989977e-05, "loss": 0.5333, "step": 3132, "task_loss": 0.025620276108384132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7170124650001526, "epoch": 2.65, "learning_rate": 3.108320251177394e-05, "loss": 0.5357, "step": 3133, "task_loss": 0.4833768904209137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.452114999294281, "epoch": 2.65, "learning_rate": 3.107716459364811e-05, "loss": 0.5525, "step": 3134, "task_loss": 0.9685155153274536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36930981278419495, "epoch": 2.65, "learning_rate": 3.107112667552228e-05, "loss": 0.6424, "step": 3135, "task_loss": 1.1915589570999146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5608888864517212, "epoch": 2.65, "learning_rate": 3.106508875739645e-05, "loss": 0.5529, "step": 3136, "task_loss": 0.5384951233863831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31910163164138794, "epoch": 2.65, "learning_rate": 3.105905083927062e-05, "loss": 0.4952, "step": 3137, "task_loss": 0.4855021834373474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30321377515792847, "epoch": 2.65, "learning_rate": 3.1053012921144795e-05, "loss": 0.4475, "step": 3138, "task_loss": 0.6564441323280334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5939916968345642, "epoch": 2.65, "learning_rate": 3.104697500301896e-05, "loss": 0.6201, "step": 3139, "task_loss": 1.1684492826461792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7223560810089111, "epoch": 2.65, "learning_rate": 3.104093708489313e-05, "loss": 0.6026, "step": 3140, "task_loss": 1.416212797164917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5104979276657104, "epoch": 2.65, "learning_rate": 3.10348991667673e-05, "loss": 0.5949, "step": 3141, "task_loss": 0.8956207633018494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35292744636535645, "epoch": 2.66, "learning_rate": 3.102886124864147e-05, "loss": 0.6916, "step": 3142, "task_loss": 0.25136932730674744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4764631986618042, "epoch": 2.66, "learning_rate": 3.102282333051564e-05, "loss": 0.5716, "step": 3143, "task_loss": 1.2492848634719849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2950657308101654, "epoch": 2.66, "learning_rate": 3.101678541238981e-05, "loss": 0.4879, "step": 3144, "task_loss": 0.13662946224212646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7230357527732849, "epoch": 2.66, "learning_rate": 3.101074749426398e-05, "loss": 0.5976, "step": 3145, "task_loss": 0.8085041046142578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4425351023674011, "epoch": 2.66, "learning_rate": 3.1004709576138146e-05, "loss": 0.5915, "step": 3146, "task_loss": 1.2783136367797852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9628309607505798, "epoch": 2.66, "learning_rate": 3.099867165801232e-05, "loss": 0.6403, "step": 3147, "task_loss": 1.2421389818191528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1932397037744522, "epoch": 2.66, "learning_rate": 3.099263373988649e-05, "loss": 0.5993, "step": 3148, "task_loss": 0.37501439452171326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7638342380523682, "epoch": 2.66, "learning_rate": 3.098659582176066e-05, "loss": 0.6165, "step": 3149, "task_loss": 0.5487585663795471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47390642762184143, "epoch": 2.66, "learning_rate": 3.098055790363483e-05, "loss": 0.4945, "step": 3150, "task_loss": 1.0245680809020996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7535247802734375, "epoch": 2.66, "learning_rate": 3.0974519985508995e-05, "loss": 0.6244, "step": 3151, "task_loss": 1.257050633430481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6857942342758179, "epoch": 2.66, "learning_rate": 3.096848206738317e-05, "loss": 0.5789, "step": 3152, "task_loss": 0.4444390535354614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48833054304122925, "epoch": 2.66, "learning_rate": 3.0962444149257336e-05, "loss": 0.4162, "step": 3153, "task_loss": 0.778516411781311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5251964330673218, "epoch": 2.67, "learning_rate": 3.095640623113151e-05, "loss": 0.5728, "step": 3154, "task_loss": 0.3578792214393616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39988404512405396, "epoch": 2.67, "learning_rate": 3.095036831300568e-05, "loss": 0.5279, "step": 3155, "task_loss": 0.7044280171394348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6251711249351501, "epoch": 2.67, "learning_rate": 3.0944330394879845e-05, "loss": 0.7082, "step": 3156, "task_loss": 0.8150951862335205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2804487347602844, "epoch": 2.67, "learning_rate": 3.093829247675402e-05, "loss": 0.3735, "step": 3157, "task_loss": 0.07673975080251694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4253045618534088, "epoch": 2.67, "learning_rate": 3.0932254558628186e-05, "loss": 0.5757, "step": 3158, "task_loss": 1.1576353311538696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7746305465698242, "epoch": 2.67, "learning_rate": 3.092621664050236e-05, "loss": 0.6491, "step": 3159, "task_loss": 1.381866216659546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5830221176147461, "epoch": 2.67, "learning_rate": 3.092017872237653e-05, "loss": 0.5632, "step": 3160, "task_loss": 0.5934934020042419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7452920079231262, "epoch": 2.67, "learning_rate": 3.0914140804250694e-05, "loss": 0.5636, "step": 3161, "task_loss": 0.9802594184875488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7019983530044556, "epoch": 2.67, "learning_rate": 3.090810288612487e-05, "loss": 0.626, "step": 3162, "task_loss": 0.8460562825202942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7861446142196655, "epoch": 2.67, "learning_rate": 3.0902064967999035e-05, "loss": 0.5528, "step": 3163, "task_loss": 1.2349528074264526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7674472332000732, "epoch": 2.67, "learning_rate": 3.08960270498732e-05, "loss": 0.5757, "step": 3164, "task_loss": 1.607685923576355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5174866318702698, "epoch": 2.67, "learning_rate": 3.0889989131747376e-05, "loss": 0.4914, "step": 3165, "task_loss": 1.6805661916732788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6954083442687988, "epoch": 2.68, "learning_rate": 3.0883951213621544e-05, "loss": 0.5147, "step": 3166, "task_loss": 1.3156284093856812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6476148366928101, "epoch": 2.68, "learning_rate": 3.087791329549571e-05, "loss": 0.5173, "step": 3167, "task_loss": 0.9593360424041748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2833440899848938, "epoch": 2.68, "learning_rate": 3.0871875377369885e-05, "loss": 0.5748, "step": 3168, "task_loss": 0.05288232862949371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7427220940589905, "epoch": 2.68, "learning_rate": 3.086583745924406e-05, "loss": 0.5896, "step": 3169, "task_loss": 0.840416669845581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9700655937194824, "epoch": 2.68, "learning_rate": 3.0859799541118226e-05, "loss": 0.6104, "step": 3170, "task_loss": 1.3787147998809814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5490815043449402, "epoch": 2.68, "learning_rate": 3.085376162299239e-05, "loss": 0.6258, "step": 3171, "task_loss": 0.6850056052207947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6520341634750366, "epoch": 2.68, "learning_rate": 3.084772370486657e-05, "loss": 0.5904, "step": 3172, "task_loss": 0.4860934913158417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3115364909172058, "epoch": 2.68, "learning_rate": 3.0841685786740734e-05, "loss": 0.6556, "step": 3173, "task_loss": 0.21354660391807556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29548540711402893, "epoch": 2.68, "learning_rate": 3.08356478686149e-05, "loss": 0.4383, "step": 3174, "task_loss": 1.10287344455719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4578655958175659, "epoch": 2.68, "learning_rate": 3.0829609950489075e-05, "loss": 0.4065, "step": 3175, "task_loss": 1.1515915393829346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3062225580215454, "epoch": 2.68, "learning_rate": 3.082357203236324e-05, "loss": 0.4239, "step": 3176, "task_loss": 0.6192571520805359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32723745703697205, "epoch": 2.69, "learning_rate": 3.081753411423741e-05, "loss": 0.4623, "step": 3177, "task_loss": 0.6153802275657654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5458131432533264, "epoch": 2.69, "learning_rate": 3.0811496196111584e-05, "loss": 0.6299, "step": 3178, "task_loss": 1.7155654430389404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2978857159614563, "epoch": 2.69, "learning_rate": 3.080545827798575e-05, "loss": 0.4739, "step": 3179, "task_loss": 0.7441806793212891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5028952360153198, "epoch": 2.69, "learning_rate": 3.079942035985992e-05, "loss": 0.4384, "step": 3180, "task_loss": 0.6387867331504822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38600367307662964, "epoch": 2.69, "learning_rate": 3.079338244173409e-05, "loss": 0.5625, "step": 3181, "task_loss": 0.8321764469146729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38268834352493286, "epoch": 2.69, "learning_rate": 3.0787344523608266e-05, "loss": 0.4511, "step": 3182, "task_loss": 0.1741582751274109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5060417056083679, "epoch": 2.69, "learning_rate": 3.0781306605482426e-05, "loss": 0.4613, "step": 3183, "task_loss": 0.3619212508201599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37571510672569275, "epoch": 2.69, "learning_rate": 3.07752686873566e-05, "loss": 0.3706, "step": 3184, "task_loss": 0.4192965626716614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25331008434295654, "epoch": 2.69, "learning_rate": 3.0769230769230774e-05, "loss": 0.4367, "step": 3185, "task_loss": 0.23749879002571106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5780601501464844, "epoch": 2.69, "learning_rate": 3.076319285110494e-05, "loss": 0.3407, "step": 3186, "task_loss": 0.25818759202957153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6269645094871521, "epoch": 2.69, "learning_rate": 3.075715493297911e-05, "loss": 0.7085, "step": 3187, "task_loss": 0.5068871378898621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6756551861763, "epoch": 2.69, "learning_rate": 3.075111701485328e-05, "loss": 0.5345, "step": 3188, "task_loss": 1.8398998975753784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5551115274429321, "epoch": 2.7, "learning_rate": 3.074507909672745e-05, "loss": 0.5866, "step": 3189, "task_loss": 0.4338461458683014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.664405107498169, "epoch": 2.7, "learning_rate": 3.073904117860162e-05, "loss": 0.5963, "step": 3190, "task_loss": 0.49369314312934875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23177018761634827, "epoch": 2.7, "learning_rate": 3.073300326047579e-05, "loss": 0.4704, "step": 3191, "task_loss": 0.3774442672729492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3443692922592163, "epoch": 2.7, "learning_rate": 3.072696534234996e-05, "loss": 0.4855, "step": 3192, "task_loss": 0.4014906883239746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.768208920955658, "epoch": 2.7, "learning_rate": 3.0720927424224125e-05, "loss": 0.6225, "step": 3193, "task_loss": 1.598648190498352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41263607144355774, "epoch": 2.7, "learning_rate": 3.07148895060983e-05, "loss": 0.4603, "step": 3194, "task_loss": 0.7725002765655518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49216678738594055, "epoch": 2.7, "learning_rate": 3.070885158797247e-05, "loss": 0.3783, "step": 3195, "task_loss": 1.2360742092132568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4298912286758423, "epoch": 2.7, "learning_rate": 3.0702813669846634e-05, "loss": 0.5251, "step": 3196, "task_loss": 0.5016134977340698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3800694942474365, "epoch": 2.7, "learning_rate": 3.069677575172081e-05, "loss": 0.5204, "step": 3197, "task_loss": 0.33968403935432434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4088529348373413, "epoch": 2.7, "learning_rate": 3.069073783359498e-05, "loss": 0.4074, "step": 3198, "task_loss": 0.25095269083976746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43422651290893555, "epoch": 2.7, "learning_rate": 3.068469991546914e-05, "loss": 0.4623, "step": 3199, "task_loss": 0.8012699484825134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5857601165771484, "epoch": 2.7, "learning_rate": 3.0678661997343316e-05, "loss": 0.4622, "step": 3200, "task_loss": 1.8307114839553833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3837246894836426, "epoch": 2.71, "learning_rate": 3.067262407921749e-05, "loss": 0.4268, "step": 3201, "task_loss": 1.1936426162719727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5787670612335205, "epoch": 2.71, "learning_rate": 3.066658616109166e-05, "loss": 0.6273, "step": 3202, "task_loss": 1.3063682317733765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46916115283966064, "epoch": 2.71, "learning_rate": 3.0660548242965824e-05, "loss": 0.5731, "step": 3203, "task_loss": 0.8099662661552429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49811381101608276, "epoch": 2.71, "learning_rate": 3.065451032484e-05, "loss": 0.4694, "step": 3204, "task_loss": 0.39067724347114563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.525667667388916, "epoch": 2.71, "learning_rate": 3.0648472406714165e-05, "loss": 0.4391, "step": 3205, "task_loss": 0.6440207958221436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5872331261634827, "epoch": 2.71, "learning_rate": 3.064243448858833e-05, "loss": 0.6577, "step": 3206, "task_loss": 1.265313744544983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6674684286117554, "epoch": 2.71, "learning_rate": 3.0636396570462507e-05, "loss": 0.6423, "step": 3207, "task_loss": 0.29054608941078186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26754745841026306, "epoch": 2.71, "learning_rate": 3.0630358652336674e-05, "loss": 0.5239, "step": 3208, "task_loss": 0.5174580812454224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4079585671424866, "epoch": 2.71, "learning_rate": 3.062432073421084e-05, "loss": 0.5955, "step": 3209, "task_loss": 1.5144098997116089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29889437556266785, "epoch": 2.71, "learning_rate": 3.0618282816085015e-05, "loss": 0.4303, "step": 3210, "task_loss": 0.07024656236171722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42257222533226013, "epoch": 2.71, "learning_rate": 3.061224489795919e-05, "loss": 0.5284, "step": 3211, "task_loss": 0.26325392723083496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6360597610473633, "epoch": 2.71, "learning_rate": 3.0606206979833356e-05, "loss": 0.5273, "step": 3212, "task_loss": 0.7259911894798279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4489091634750366, "epoch": 2.72, "learning_rate": 3.060016906170752e-05, "loss": 0.3512, "step": 3213, "task_loss": 0.45191895961761475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5106948018074036, "epoch": 2.72, "learning_rate": 3.05941311435817e-05, "loss": 0.5945, "step": 3214, "task_loss": 0.5507712960243225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4715074300765991, "epoch": 2.72, "learning_rate": 3.0588093225455864e-05, "loss": 0.4358, "step": 3215, "task_loss": 0.3588704466819763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.580095648765564, "epoch": 2.72, "learning_rate": 3.058205530733003e-05, "loss": 0.6498, "step": 3216, "task_loss": 0.6656926870346069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5653750896453857, "epoch": 2.72, "learning_rate": 3.0576017389204206e-05, "loss": 0.5389, "step": 3217, "task_loss": 0.31138649582862854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36835071444511414, "epoch": 2.72, "learning_rate": 3.056997947107837e-05, "loss": 0.5878, "step": 3218, "task_loss": 0.27957233786582947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5291355848312378, "epoch": 2.72, "learning_rate": 3.056394155295254e-05, "loss": 0.4832, "step": 3219, "task_loss": 0.31542453169822693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5809739828109741, "epoch": 2.72, "learning_rate": 3.0557903634826714e-05, "loss": 0.5056, "step": 3220, "task_loss": 0.49886906147003174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4397006034851074, "epoch": 2.72, "learning_rate": 3.055186571670088e-05, "loss": 0.447, "step": 3221, "task_loss": 0.5962256789207458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38032397627830505, "epoch": 2.72, "learning_rate": 3.0545827798575055e-05, "loss": 0.3848, "step": 3222, "task_loss": 0.7933246493339539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.559018075466156, "epoch": 2.72, "learning_rate": 3.053978988044922e-05, "loss": 0.5342, "step": 3223, "task_loss": 1.2698670625686646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4972827434539795, "epoch": 2.72, "learning_rate": 3.053375196232339e-05, "loss": 0.6263, "step": 3224, "task_loss": 0.7620044946670532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5312201380729675, "epoch": 2.73, "learning_rate": 3.052771404419756e-05, "loss": 0.5803, "step": 3225, "task_loss": 0.3638075888156891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.430900514125824, "epoch": 2.73, "learning_rate": 3.052167612607173e-05, "loss": 0.6516, "step": 3226, "task_loss": 1.1541829109191895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.868497908115387, "epoch": 2.73, "learning_rate": 3.05156382079459e-05, "loss": 0.623, "step": 3227, "task_loss": 0.4757522642612457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5810871720314026, "epoch": 2.73, "learning_rate": 3.050960028982007e-05, "loss": 0.6473, "step": 3228, "task_loss": 0.8574575185775757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.304290771484375, "epoch": 2.73, "learning_rate": 3.050356237169424e-05, "loss": 0.3983, "step": 3229, "task_loss": 0.3107747435569763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3554532527923584, "epoch": 2.73, "learning_rate": 3.049752445356841e-05, "loss": 0.4907, "step": 3230, "task_loss": 1.3671449422836304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5627246499061584, "epoch": 2.73, "learning_rate": 3.049148653544258e-05, "loss": 0.5987, "step": 3231, "task_loss": 0.9031329154968262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43434447050094604, "epoch": 2.73, "learning_rate": 3.0485448617316754e-05, "loss": 0.5386, "step": 3232, "task_loss": 0.5406363606452942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26349952816963196, "epoch": 2.73, "learning_rate": 3.0479410699190918e-05, "loss": 0.342, "step": 3233, "task_loss": 0.3519270718097687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4358598589897156, "epoch": 2.73, "learning_rate": 3.047337278106509e-05, "loss": 0.5146, "step": 3234, "task_loss": 0.23174291849136353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41070979833602905, "epoch": 2.73, "learning_rate": 3.0467334862939262e-05, "loss": 0.3978, "step": 3235, "task_loss": 0.47279468178749084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2085532695055008, "epoch": 2.73, "learning_rate": 3.046129694481343e-05, "loss": 0.4223, "step": 3236, "task_loss": 0.2876328229904175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2870514392852783, "epoch": 2.74, "learning_rate": 3.04552590266876e-05, "loss": 0.3677, "step": 3237, "task_loss": 0.3084774911403656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45128634572029114, "epoch": 2.74, "learning_rate": 3.044922110856177e-05, "loss": 0.4982, "step": 3238, "task_loss": 0.48651358485221863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3294084966182709, "epoch": 2.74, "learning_rate": 3.0443183190435938e-05, "loss": 0.5006, "step": 3239, "task_loss": 0.301012247800827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4394393563270569, "epoch": 2.74, "learning_rate": 3.043714527231011e-05, "loss": 0.5479, "step": 3240, "task_loss": 1.7544715404510498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49224233627319336, "epoch": 2.74, "learning_rate": 3.043110735418428e-05, "loss": 0.4349, "step": 3241, "task_loss": 0.4113180637359619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48700380325317383, "epoch": 2.74, "learning_rate": 3.042506943605845e-05, "loss": 0.4506, "step": 3242, "task_loss": 0.3703617453575134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3209194540977478, "epoch": 2.74, "learning_rate": 3.0419031517932617e-05, "loss": 0.4475, "step": 3243, "task_loss": 0.40592581033706665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4638032913208008, "epoch": 2.74, "learning_rate": 3.0412993599806787e-05, "loss": 0.642, "step": 3244, "task_loss": 0.7794869542121887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5062592029571533, "epoch": 2.74, "learning_rate": 3.040695568168096e-05, "loss": 0.4612, "step": 3245, "task_loss": 0.449448823928833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5690920352935791, "epoch": 2.74, "learning_rate": 3.0400917763555125e-05, "loss": 0.6635, "step": 3246, "task_loss": 0.8000437021255493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5993634462356567, "epoch": 2.74, "learning_rate": 3.0394879845429296e-05, "loss": 0.5685, "step": 3247, "task_loss": 0.44725093245506287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34532034397125244, "epoch": 2.75, "learning_rate": 3.038884192730347e-05, "loss": 0.4854, "step": 3248, "task_loss": 0.6222206354141235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6725810170173645, "epoch": 2.75, "learning_rate": 3.0382804009177633e-05, "loss": 0.5344, "step": 3249, "task_loss": 0.3820650279521942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4795305132865906, "epoch": 2.75, "learning_rate": 3.0376766091051807e-05, "loss": 0.5991, "step": 3250, "task_loss": 0.6683096885681152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4900311231613159, "epoch": 2.75, "learning_rate": 3.0370728172925978e-05, "loss": 0.7386, "step": 3251, "task_loss": 0.5798938274383545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8630433082580566, "epoch": 2.75, "learning_rate": 3.036469025480015e-05, "loss": 0.6583, "step": 3252, "task_loss": 0.5770004391670227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43686816096305847, "epoch": 2.75, "learning_rate": 3.0358652336674316e-05, "loss": 0.5138, "step": 3253, "task_loss": 0.38652104139328003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5813007950782776, "epoch": 2.75, "learning_rate": 3.0352614418548486e-05, "loss": 0.639, "step": 3254, "task_loss": 1.4196131229400635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.723459005355835, "epoch": 2.75, "learning_rate": 3.0346576500422657e-05, "loss": 0.7354, "step": 3255, "task_loss": 0.32382020354270935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42578771710395813, "epoch": 2.75, "learning_rate": 3.0340538582296824e-05, "loss": 0.4419, "step": 3256, "task_loss": 1.0294438600540161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4687573313713074, "epoch": 2.75, "learning_rate": 3.0334500664170994e-05, "loss": 0.4185, "step": 3257, "task_loss": 0.8259470462799072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45325958728790283, "epoch": 2.75, "learning_rate": 3.0328462746045165e-05, "loss": 0.4502, "step": 3258, "task_loss": 0.9170405268669128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27641332149505615, "epoch": 2.75, "learning_rate": 3.0322424827919332e-05, "loss": 0.5567, "step": 3259, "task_loss": 1.2403013706207275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7370938062667847, "epoch": 2.76, "learning_rate": 3.0316386909793503e-05, "loss": 0.5509, "step": 3260, "task_loss": 0.6009771823883057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5248786211013794, "epoch": 2.76, "learning_rate": 3.0310348991667677e-05, "loss": 0.5211, "step": 3261, "task_loss": 0.2109261304140091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9017475843429565, "epoch": 2.76, "learning_rate": 3.0304311073541847e-05, "loss": 0.5152, "step": 3262, "task_loss": 1.189225196838379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48161202669143677, "epoch": 2.76, "learning_rate": 3.029827315541601e-05, "loss": 0.4895, "step": 3263, "task_loss": 0.6032607555389404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4994469881057739, "epoch": 2.76, "learning_rate": 3.0292235237290185e-05, "loss": 0.5073, "step": 3264, "task_loss": 0.3731783926486969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5414553284645081, "epoch": 2.76, "learning_rate": 3.0286197319164356e-05, "loss": 0.6009, "step": 3265, "task_loss": 1.255908727645874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48627597093582153, "epoch": 2.76, "learning_rate": 3.0280159401038523e-05, "loss": 0.5747, "step": 3266, "task_loss": 0.2147202044725418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35775768756866455, "epoch": 2.76, "learning_rate": 3.0274121482912693e-05, "loss": 0.4746, "step": 3267, "task_loss": 0.4030601978302002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5276824831962585, "epoch": 2.76, "learning_rate": 3.0268083564786864e-05, "loss": 0.6025, "step": 3268, "task_loss": 0.851517379283905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6443438529968262, "epoch": 2.76, "learning_rate": 3.026204564666103e-05, "loss": 0.5207, "step": 3269, "task_loss": 0.8468554019927979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5982266664505005, "epoch": 2.76, "learning_rate": 3.0256007728535202e-05, "loss": 0.5681, "step": 3270, "task_loss": 0.3511967062950134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30915772914886475, "epoch": 2.76, "learning_rate": 3.0249969810409372e-05, "loss": 0.7263, "step": 3271, "task_loss": 0.4788038432598114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.598492443561554, "epoch": 2.77, "learning_rate": 3.0243931892283546e-05, "loss": 0.5634, "step": 3272, "task_loss": 1.0226997137069702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.210773766040802, "epoch": 2.77, "learning_rate": 3.023789397415771e-05, "loss": 0.434, "step": 3273, "task_loss": 1.388162612915039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5002515912055969, "epoch": 2.77, "learning_rate": 3.023185605603188e-05, "loss": 0.3931, "step": 3274, "task_loss": 0.294077605009079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3727152943611145, "epoch": 2.77, "learning_rate": 3.0225818137906055e-05, "loss": 0.3784, "step": 3275, "task_loss": 0.12860733270645142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4901689887046814, "epoch": 2.77, "learning_rate": 3.021978021978022e-05, "loss": 0.4818, "step": 3276, "task_loss": 1.0354458093643188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4156952500343323, "epoch": 2.77, "learning_rate": 3.0213742301654392e-05, "loss": 0.4833, "step": 3277, "task_loss": 0.15790198743343353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.440599650144577, "epoch": 2.77, "learning_rate": 3.0207704383528563e-05, "loss": 0.5404, "step": 3278, "task_loss": 0.5470650792121887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7631182074546814, "epoch": 2.77, "learning_rate": 3.0201666465402727e-05, "loss": 0.5038, "step": 3279, "task_loss": 1.2479286193847656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5729435682296753, "epoch": 2.77, "learning_rate": 3.01956285472769e-05, "loss": 0.6478, "step": 3280, "task_loss": 0.4066108465194702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4004093408584595, "epoch": 2.77, "learning_rate": 3.018959062915107e-05, "loss": 0.6087, "step": 3281, "task_loss": 1.672621726989746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6326103806495667, "epoch": 2.77, "learning_rate": 3.0183552711025242e-05, "loss": 0.6314, "step": 3282, "task_loss": 1.5971965789794922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24506723880767822, "epoch": 2.77, "learning_rate": 3.017751479289941e-05, "loss": 0.4668, "step": 3283, "task_loss": 0.27523326873779297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.370430052280426, "epoch": 2.78, "learning_rate": 3.017147687477358e-05, "loss": 0.5453, "step": 3284, "task_loss": 0.6800107955932617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4174608290195465, "epoch": 2.78, "learning_rate": 3.016543895664775e-05, "loss": 0.5374, "step": 3285, "task_loss": 0.5336780548095703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8511661887168884, "epoch": 2.78, "learning_rate": 3.0159401038521917e-05, "loss": 0.6895, "step": 3286, "task_loss": 0.8207129836082458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5455493927001953, "epoch": 2.78, "learning_rate": 3.0153363120396088e-05, "loss": 0.4345, "step": 3287, "task_loss": 0.6961100101470947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4662633538246155, "epoch": 2.78, "learning_rate": 3.014732520227026e-05, "loss": 0.5689, "step": 3288, "task_loss": 0.8812435269355774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28566262125968933, "epoch": 2.78, "learning_rate": 3.0141287284144426e-05, "loss": 0.5337, "step": 3289, "task_loss": 0.10772304236888885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6161923408508301, "epoch": 2.78, "learning_rate": 3.0135249366018596e-05, "loss": 0.5149, "step": 3290, "task_loss": 0.7003945112228394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34146857261657715, "epoch": 2.78, "learning_rate": 3.012921144789277e-05, "loss": 0.5204, "step": 3291, "task_loss": 0.2180652618408203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6697998046875, "epoch": 2.78, "learning_rate": 3.012317352976694e-05, "loss": 0.5201, "step": 3292, "task_loss": 0.7547323703765869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32966387271881104, "epoch": 2.78, "learning_rate": 3.0117135611641108e-05, "loss": 0.501, "step": 3293, "task_loss": 0.7424336075782776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5144477486610413, "epoch": 2.78, "learning_rate": 3.011109769351528e-05, "loss": 0.5024, "step": 3294, "task_loss": 0.4240069091320038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3840091824531555, "epoch": 2.78, "learning_rate": 3.010505977538945e-05, "loss": 0.4548, "step": 3295, "task_loss": 0.8876148462295532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47645482420921326, "epoch": 2.79, "learning_rate": 3.0099021857263616e-05, "loss": 0.5094, "step": 3296, "task_loss": 0.6505768895149231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8579526543617249, "epoch": 2.79, "learning_rate": 3.0092983939137787e-05, "loss": 0.5902, "step": 3297, "task_loss": 1.5455435514450073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49256381392478943, "epoch": 2.79, "learning_rate": 3.0086946021011957e-05, "loss": 0.4913, "step": 3298, "task_loss": 0.7729485630989075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.339240163564682, "epoch": 2.79, "learning_rate": 3.0080908102886125e-05, "loss": 0.3968, "step": 3299, "task_loss": 0.14011752605438232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2377348244190216, "epoch": 2.79, "learning_rate": 3.0074870184760295e-05, "loss": 0.5494, "step": 3300, "task_loss": 1.087864875793457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29552632570266724, "epoch": 2.79, "learning_rate": 3.0068832266634466e-05, "loss": 0.5378, "step": 3301, "task_loss": 0.7623904943466187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5100905895233154, "epoch": 2.79, "learning_rate": 3.006279434850864e-05, "loss": 0.5553, "step": 3302, "task_loss": 0.3118247985839844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48813021183013916, "epoch": 2.79, "learning_rate": 3.0056756430382803e-05, "loss": 0.6567, "step": 3303, "task_loss": 0.7691986560821533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3899998962879181, "epoch": 2.79, "learning_rate": 3.0050718512256974e-05, "loss": 0.6205, "step": 3304, "task_loss": 0.9454507827758789 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32573437690734863, "epoch": 2.79, "learning_rate": 3.0044680594131148e-05, "loss": 0.558, "step": 3305, "task_loss": 0.6497354507446289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36382603645324707, "epoch": 2.79, "learning_rate": 3.0038642676005312e-05, "loss": 0.3775, "step": 3306, "task_loss": 0.5952476263046265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.316776305437088, "epoch": 2.79, "learning_rate": 3.0032604757879486e-05, "loss": 0.4253, "step": 3307, "task_loss": 0.5110852122306824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3752826452255249, "epoch": 2.8, "learning_rate": 3.0026566839753656e-05, "loss": 0.4737, "step": 3308, "task_loss": 0.7223020792007446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4197548031806946, "epoch": 2.8, "learning_rate": 3.002052892162782e-05, "loss": 0.5612, "step": 3309, "task_loss": 0.7341300845146179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6826184391975403, "epoch": 2.8, "learning_rate": 3.0014491003501994e-05, "loss": 0.6354, "step": 3310, "task_loss": 1.5742064714431763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5928375720977783, "epoch": 2.8, "learning_rate": 3.0008453085376165e-05, "loss": 0.5717, "step": 3311, "task_loss": 1.2189887762069702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5318623781204224, "epoch": 2.8, "learning_rate": 3.0002415167250335e-05, "loss": 0.52, "step": 3312, "task_loss": 0.37981081008911133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3812848925590515, "epoch": 2.8, "learning_rate": 2.9996377249124502e-05, "loss": 0.3824, "step": 3313, "task_loss": 0.30498626828193665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7374739646911621, "epoch": 2.8, "learning_rate": 2.9990339330998673e-05, "loss": 0.7015, "step": 3314, "task_loss": 1.2527122497558594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34369173645973206, "epoch": 2.8, "learning_rate": 2.9984301412872844e-05, "loss": 0.4837, "step": 3315, "task_loss": 0.06839951127767563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6777631044387817, "epoch": 2.8, "learning_rate": 2.997826349474701e-05, "loss": 0.5708, "step": 3316, "task_loss": 0.5228925943374634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4524681568145752, "epoch": 2.8, "learning_rate": 2.997222557662118e-05, "loss": 0.4309, "step": 3317, "task_loss": 0.7054744958877563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4492039680480957, "epoch": 2.8, "learning_rate": 2.9966187658495355e-05, "loss": 0.4541, "step": 3318, "task_loss": 0.34380897879600525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41480666399002075, "epoch": 2.81, "learning_rate": 2.996014974036952e-05, "loss": 0.4557, "step": 3319, "task_loss": 0.694098711013794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8656102418899536, "epoch": 2.81, "learning_rate": 2.995411182224369e-05, "loss": 0.5019, "step": 3320, "task_loss": 0.7577204704284668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40808191895484924, "epoch": 2.81, "learning_rate": 2.9948073904117864e-05, "loss": 0.4982, "step": 3321, "task_loss": 0.740150511264801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1394113153219223, "epoch": 2.81, "learning_rate": 2.9942035985992034e-05, "loss": 0.3519, "step": 3322, "task_loss": 0.08033248037099838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5206465721130371, "epoch": 2.81, "learning_rate": 2.99359980678662e-05, "loss": 0.4373, "step": 3323, "task_loss": 0.4081490635871887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3536440134048462, "epoch": 2.81, "learning_rate": 2.9929960149740372e-05, "loss": 0.499, "step": 3324, "task_loss": 0.6050533652305603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6978265047073364, "epoch": 2.81, "learning_rate": 2.9923922231614543e-05, "loss": 0.6031, "step": 3325, "task_loss": 0.3567456305027008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4467445909976959, "epoch": 2.81, "learning_rate": 2.991788431348871e-05, "loss": 0.4264, "step": 3326, "task_loss": 0.4254438877105713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1926264613866806, "epoch": 2.81, "learning_rate": 2.991184639536288e-05, "loss": 0.4119, "step": 3327, "task_loss": 0.4274410903453827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5410107374191284, "epoch": 2.81, "learning_rate": 2.990580847723705e-05, "loss": 0.449, "step": 3328, "task_loss": 0.6049813032150269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35825493931770325, "epoch": 2.81, "learning_rate": 2.9899770559111218e-05, "loss": 0.5391, "step": 3329, "task_loss": 1.1882632970809937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5789589881896973, "epoch": 2.81, "learning_rate": 2.989373264098539e-05, "loss": 0.6636, "step": 3330, "task_loss": 0.39743661880493164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33827465772628784, "epoch": 2.82, "learning_rate": 2.988769472285956e-05, "loss": 0.6361, "step": 3331, "task_loss": 0.7390314936637878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5812557935714722, "epoch": 2.82, "learning_rate": 2.9881656804733733e-05, "loss": 0.458, "step": 3332, "task_loss": 0.9416660666465759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6008345484733582, "epoch": 2.82, "learning_rate": 2.9875618886607897e-05, "loss": 0.5736, "step": 3333, "task_loss": 0.7933235168457031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.699802041053772, "epoch": 2.82, "learning_rate": 2.986958096848207e-05, "loss": 0.5297, "step": 3334, "task_loss": 0.8688437342643738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5347750186920166, "epoch": 2.82, "learning_rate": 2.986354305035624e-05, "loss": 0.5124, "step": 3335, "task_loss": 0.708240807056427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21185263991355896, "epoch": 2.82, "learning_rate": 2.9857505132230405e-05, "loss": 0.3583, "step": 3336, "task_loss": 0.059961117804050446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28819984197616577, "epoch": 2.82, "learning_rate": 2.985146721410458e-05, "loss": 0.5852, "step": 3337, "task_loss": 0.3330778479576111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7903405427932739, "epoch": 2.82, "learning_rate": 2.984542929597875e-05, "loss": 0.8405, "step": 3338, "task_loss": 0.65412437915802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3387461006641388, "epoch": 2.82, "learning_rate": 2.9839391377852917e-05, "loss": 0.493, "step": 3339, "task_loss": 0.4016391336917877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3633604049682617, "epoch": 2.82, "learning_rate": 2.9833353459727088e-05, "loss": 0.3648, "step": 3340, "task_loss": 0.34191620349884033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2613677382469177, "epoch": 2.82, "learning_rate": 2.9827315541601258e-05, "loss": 0.5654, "step": 3341, "task_loss": 0.2759416401386261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43032172322273254, "epoch": 2.82, "learning_rate": 2.982127762347543e-05, "loss": 0.5225, "step": 3342, "task_loss": 0.6004908680915833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9102005958557129, "epoch": 2.83, "learning_rate": 2.9815239705349596e-05, "loss": 0.558, "step": 3343, "task_loss": 0.4773593246936798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23707905411720276, "epoch": 2.83, "learning_rate": 2.9809201787223766e-05, "loss": 0.4716, "step": 3344, "task_loss": 0.2989025115966797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.635972797870636, "epoch": 2.83, "learning_rate": 2.9803163869097937e-05, "loss": 0.5469, "step": 3345, "task_loss": 0.35803526639938354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5867385864257812, "epoch": 2.83, "learning_rate": 2.9797125950972104e-05, "loss": 0.5599, "step": 3346, "task_loss": 0.3663881719112396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39361220598220825, "epoch": 2.83, "learning_rate": 2.9791088032846275e-05, "loss": 0.3782, "step": 3347, "task_loss": 0.35756534337997437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6390718221664429, "epoch": 2.83, "learning_rate": 2.978505011472045e-05, "loss": 0.4893, "step": 3348, "task_loss": 1.1653854846954346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3367272913455963, "epoch": 2.83, "learning_rate": 2.9779012196594612e-05, "loss": 0.4829, "step": 3349, "task_loss": 0.9398810267448425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5996885299682617, "epoch": 2.83, "learning_rate": 2.9772974278468786e-05, "loss": 0.456, "step": 3350, "task_loss": 1.0904005765914917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4842492938041687, "epoch": 2.83, "learning_rate": 2.9766936360342957e-05, "loss": 0.5633, "step": 3351, "task_loss": 1.1759099960327148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6308097839355469, "epoch": 2.83, "learning_rate": 2.976089844221712e-05, "loss": 0.5543, "step": 3352, "task_loss": 1.3089755773544312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6315435171127319, "epoch": 2.83, "learning_rate": 2.9754860524091295e-05, "loss": 0.4666, "step": 3353, "task_loss": 0.3219771981239319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5553852319717407, "epoch": 2.83, "learning_rate": 2.9748822605965465e-05, "loss": 0.5428, "step": 3354, "task_loss": 1.1592820882797241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8450289964675903, "epoch": 2.84, "learning_rate": 2.9742784687839636e-05, "loss": 0.7189, "step": 3355, "task_loss": 1.3294743299484253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48444855213165283, "epoch": 2.84, "learning_rate": 2.9736746769713803e-05, "loss": 0.5015, "step": 3356, "task_loss": 0.5706074237823486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.401547908782959, "epoch": 2.84, "learning_rate": 2.9730708851587974e-05, "loss": 0.5203, "step": 3357, "task_loss": 0.3841138482093811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.546968936920166, "epoch": 2.84, "learning_rate": 2.9724670933462144e-05, "loss": 0.4041, "step": 3358, "task_loss": 0.609691858291626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4293258786201477, "epoch": 2.84, "learning_rate": 2.971863301533631e-05, "loss": 0.6382, "step": 3359, "task_loss": 1.2691240310668945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5758242607116699, "epoch": 2.84, "learning_rate": 2.9712595097210482e-05, "loss": 0.5487, "step": 3360, "task_loss": 0.6721800565719604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36187300086021423, "epoch": 2.84, "learning_rate": 2.9706557179084653e-05, "loss": 0.5756, "step": 3361, "task_loss": 0.3215368092060089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2915619909763336, "epoch": 2.84, "learning_rate": 2.970051926095882e-05, "loss": 0.4908, "step": 3362, "task_loss": 0.45061415433883667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7231187224388123, "epoch": 2.84, "learning_rate": 2.969448134283299e-05, "loss": 0.6107, "step": 3363, "task_loss": 0.8878286480903625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5620583891868591, "epoch": 2.84, "learning_rate": 2.9688443424707164e-05, "loss": 0.7371, "step": 3364, "task_loss": 0.9718077182769775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43780583143234253, "epoch": 2.84, "learning_rate": 2.9682405506581335e-05, "loss": 0.5791, "step": 3365, "task_loss": 0.9148914813995361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7152829170227051, "epoch": 2.84, "learning_rate": 2.9676367588455502e-05, "loss": 0.5878, "step": 3366, "task_loss": 1.6183228492736816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5010735988616943, "epoch": 2.85, "learning_rate": 2.9670329670329673e-05, "loss": 0.6907, "step": 3367, "task_loss": 0.9151827692985535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3378761112689972, "epoch": 2.85, "learning_rate": 2.9664291752203843e-05, "loss": 0.6056, "step": 3368, "task_loss": 0.9268571138381958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3507848083972931, "epoch": 2.85, "learning_rate": 2.965825383407801e-05, "loss": 0.5381, "step": 3369, "task_loss": 0.1474369317293167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7318501472473145, "epoch": 2.85, "learning_rate": 2.965221591595218e-05, "loss": 0.5222, "step": 3370, "task_loss": 0.9303399324417114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6295871734619141, "epoch": 2.85, "learning_rate": 2.964617799782635e-05, "loss": 0.6235, "step": 3371, "task_loss": 0.9008583426475525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5412274599075317, "epoch": 2.85, "learning_rate": 2.964014007970052e-05, "loss": 0.4859, "step": 3372, "task_loss": 0.26114463806152344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36834073066711426, "epoch": 2.85, "learning_rate": 2.963410216157469e-05, "loss": 0.3182, "step": 3373, "task_loss": 0.4569679796695709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4925301671028137, "epoch": 2.85, "learning_rate": 2.962806424344886e-05, "loss": 0.4681, "step": 3374, "task_loss": 0.8180443644523621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5548438429832458, "epoch": 2.85, "learning_rate": 2.9622026325323034e-05, "loss": 0.4742, "step": 3375, "task_loss": 0.3496617078781128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5645369291305542, "epoch": 2.85, "learning_rate": 2.9615988407197198e-05, "loss": 0.5775, "step": 3376, "task_loss": 0.9046251773834229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5389723181724548, "epoch": 2.85, "learning_rate": 2.9609950489071368e-05, "loss": 0.4996, "step": 3377, "task_loss": 0.6590587496757507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41198039054870605, "epoch": 2.85, "learning_rate": 2.9603912570945542e-05, "loss": 0.4701, "step": 3378, "task_loss": 0.5668193697929382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4884001612663269, "epoch": 2.86, "learning_rate": 2.9597874652819706e-05, "loss": 0.5373, "step": 3379, "task_loss": 0.8821157217025757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26510918140411377, "epoch": 2.86, "learning_rate": 2.959183673469388e-05, "loss": 0.5081, "step": 3380, "task_loss": 0.2882504463195801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27601781487464905, "epoch": 2.86, "learning_rate": 2.958579881656805e-05, "loss": 0.4091, "step": 3381, "task_loss": 0.1617375910282135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.807146430015564, "epoch": 2.86, "learning_rate": 2.9579760898442214e-05, "loss": 0.537, "step": 3382, "task_loss": 0.5739713907241821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4145191013813019, "epoch": 2.86, "learning_rate": 2.9573722980316388e-05, "loss": 0.5543, "step": 3383, "task_loss": 0.20358826220035553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5025771260261536, "epoch": 2.86, "learning_rate": 2.956768506219056e-05, "loss": 0.5066, "step": 3384, "task_loss": 0.7737794518470764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27747759222984314, "epoch": 2.86, "learning_rate": 2.956164714406473e-05, "loss": 0.6617, "step": 3385, "task_loss": 0.7166411280632019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5603737831115723, "epoch": 2.86, "learning_rate": 2.9555609225938897e-05, "loss": 0.5572, "step": 3386, "task_loss": 0.7936754822731018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9908336400985718, "epoch": 2.86, "learning_rate": 2.9549571307813067e-05, "loss": 0.7739, "step": 3387, "task_loss": 1.7227392196655273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4118739068508148, "epoch": 2.86, "learning_rate": 2.9543533389687238e-05, "loss": 0.6818, "step": 3388, "task_loss": 0.8992160558700562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5726567506790161, "epoch": 2.86, "learning_rate": 2.9537495471561405e-05, "loss": 0.5277, "step": 3389, "task_loss": 0.49561524391174316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5079250335693359, "epoch": 2.87, "learning_rate": 2.9531457553435575e-05, "loss": 0.466, "step": 3390, "task_loss": 0.8577269911766052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5130441188812256, "epoch": 2.87, "learning_rate": 2.952541963530975e-05, "loss": 0.4351, "step": 3391, "task_loss": 0.7880834341049194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4451906681060791, "epoch": 2.87, "learning_rate": 2.9519381717183913e-05, "loss": 0.6401, "step": 3392, "task_loss": 0.6278903484344482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7006067037582397, "epoch": 2.87, "learning_rate": 2.9513343799058084e-05, "loss": 0.6106, "step": 3393, "task_loss": 0.7324469089508057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32181811332702637, "epoch": 2.87, "learning_rate": 2.9507305880932258e-05, "loss": 0.3897, "step": 3394, "task_loss": 0.31782805919647217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35038068890571594, "epoch": 2.87, "learning_rate": 2.9501267962806428e-05, "loss": 0.4719, "step": 3395, "task_loss": 0.359840452671051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20895010232925415, "epoch": 2.87, "learning_rate": 2.9495230044680595e-05, "loss": 0.4637, "step": 3396, "task_loss": 0.26630669832229614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3660549223423004, "epoch": 2.87, "learning_rate": 2.9489192126554766e-05, "loss": 0.4393, "step": 3397, "task_loss": 0.246572345495224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3702227473258972, "epoch": 2.87, "learning_rate": 2.9483154208428937e-05, "loss": 0.4242, "step": 3398, "task_loss": 0.38330912590026855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4709116816520691, "epoch": 2.87, "learning_rate": 2.9477116290303104e-05, "loss": 0.517, "step": 3399, "task_loss": 0.8910549283027649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3536267876625061, "epoch": 2.87, "learning_rate": 2.9471078372177274e-05, "loss": 0.4154, "step": 3400, "task_loss": 0.8119101524353027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4062059819698334, "epoch": 2.87, "learning_rate": 2.9465040454051445e-05, "loss": 0.4469, "step": 3401, "task_loss": 0.4545731544494629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5117408037185669, "epoch": 2.88, "learning_rate": 2.9459002535925612e-05, "loss": 0.5504, "step": 3402, "task_loss": 0.443561315536499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4815858006477356, "epoch": 2.88, "learning_rate": 2.9452964617799783e-05, "loss": 0.6002, "step": 3403, "task_loss": 0.7268723845481873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8348985314369202, "epoch": 2.88, "learning_rate": 2.9446926699673953e-05, "loss": 0.6244, "step": 3404, "task_loss": 1.6299705505371094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5485211610794067, "epoch": 2.88, "learning_rate": 2.9440888781548127e-05, "loss": 0.5215, "step": 3405, "task_loss": 0.432005912065506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6411656737327576, "epoch": 2.88, "learning_rate": 2.943485086342229e-05, "loss": 0.5445, "step": 3406, "task_loss": 0.5530977845191956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5392565131187439, "epoch": 2.88, "learning_rate": 2.9428812945296465e-05, "loss": 0.54, "step": 3407, "task_loss": 0.8343731760978699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6693518757820129, "epoch": 2.88, "learning_rate": 2.9422775027170636e-05, "loss": 0.5746, "step": 3408, "task_loss": 0.5247251987457275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6154510974884033, "epoch": 2.88, "learning_rate": 2.94167371090448e-05, "loss": 0.475, "step": 3409, "task_loss": 0.6181241869926453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41832855343818665, "epoch": 2.88, "learning_rate": 2.9410699190918973e-05, "loss": 0.4836, "step": 3410, "task_loss": 0.46420353651046753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3494272828102112, "epoch": 2.88, "learning_rate": 2.9404661272793144e-05, "loss": 0.4779, "step": 3411, "task_loss": 1.20016348361969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7249889373779297, "epoch": 2.88, "learning_rate": 2.939862335466731e-05, "loss": 0.5814, "step": 3412, "task_loss": 0.7657532095909119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44126659631729126, "epoch": 2.88, "learning_rate": 2.939258543654148e-05, "loss": 0.6006, "step": 3413, "task_loss": 0.7880092263221741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41105493903160095, "epoch": 2.89, "learning_rate": 2.9386547518415652e-05, "loss": 0.678, "step": 3414, "task_loss": 1.3146309852600098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6108083724975586, "epoch": 2.89, "learning_rate": 2.9380509600289823e-05, "loss": 0.5992, "step": 3415, "task_loss": 1.2487716674804688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5251747369766235, "epoch": 2.89, "learning_rate": 2.937447168216399e-05, "loss": 0.4782, "step": 3416, "task_loss": 1.0294334888458252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47672367095947266, "epoch": 2.89, "learning_rate": 2.936843376403816e-05, "loss": 0.6145, "step": 3417, "task_loss": 1.1981189250946045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5686930418014526, "epoch": 2.89, "learning_rate": 2.936239584591233e-05, "loss": 0.5548, "step": 3418, "task_loss": 0.6013724207878113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4096831679344177, "epoch": 2.89, "learning_rate": 2.9356357927786498e-05, "loss": 0.4275, "step": 3419, "task_loss": 0.45757290720939636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.610349178314209, "epoch": 2.89, "learning_rate": 2.935032000966067e-05, "loss": 0.5705, "step": 3420, "task_loss": 1.1569390296936035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4579598307609558, "epoch": 2.89, "learning_rate": 2.9344282091534843e-05, "loss": 0.4844, "step": 3421, "task_loss": 1.298506498336792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6023845672607422, "epoch": 2.89, "learning_rate": 2.9338244173409007e-05, "loss": 0.553, "step": 3422, "task_loss": 1.0986379384994507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19389712810516357, "epoch": 2.89, "learning_rate": 2.933220625528318e-05, "loss": 0.4355, "step": 3423, "task_loss": 0.42161422967910767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47808700799942017, "epoch": 2.89, "learning_rate": 2.932616833715735e-05, "loss": 0.4696, "step": 3424, "task_loss": 0.21199139952659607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5045957565307617, "epoch": 2.89, "learning_rate": 2.932013041903152e-05, "loss": 0.4497, "step": 3425, "task_loss": 1.485680103302002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3520262837409973, "epoch": 2.9, "learning_rate": 2.931409250090569e-05, "loss": 0.4794, "step": 3426, "task_loss": 0.9181094169616699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3392193615436554, "epoch": 2.9, "learning_rate": 2.930805458277986e-05, "loss": 0.6468, "step": 3427, "task_loss": 0.6281761527061462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6498593688011169, "epoch": 2.9, "learning_rate": 2.930201666465403e-05, "loss": 0.5335, "step": 3428, "task_loss": 0.5243313908576965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5686932802200317, "epoch": 2.9, "learning_rate": 2.9295978746528197e-05, "loss": 0.4493, "step": 3429, "task_loss": 0.6182946562767029 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4842614531517029, "epoch": 2.9, "learning_rate": 2.9289940828402368e-05, "loss": 0.466, "step": 3430, "task_loss": 0.7677356004714966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20397941768169403, "epoch": 2.9, "learning_rate": 2.928390291027654e-05, "loss": 0.3199, "step": 3431, "task_loss": 0.25134992599487305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41695231199264526, "epoch": 2.9, "learning_rate": 2.9277864992150706e-05, "loss": 0.3446, "step": 3432, "task_loss": 1.0171159505844116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5654134750366211, "epoch": 2.9, "learning_rate": 2.9271827074024876e-05, "loss": 0.4823, "step": 3433, "task_loss": 0.3269893527030945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5508276224136353, "epoch": 2.9, "learning_rate": 2.9265789155899047e-05, "loss": 0.3746, "step": 3434, "task_loss": 0.8636150360107422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5234626531600952, "epoch": 2.9, "learning_rate": 2.925975123777322e-05, "loss": 0.6542, "step": 3435, "task_loss": 1.036733865737915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6407731771469116, "epoch": 2.9, "learning_rate": 2.9253713319647384e-05, "loss": 0.458, "step": 3436, "task_loss": 1.086212158203125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4262845516204834, "epoch": 2.9, "learning_rate": 2.924767540152156e-05, "loss": 0.3683, "step": 3437, "task_loss": 0.3807339370250702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5487103462219238, "epoch": 2.91, "learning_rate": 2.924163748339573e-05, "loss": 0.5019, "step": 3438, "task_loss": 0.24366691708564758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7898529171943665, "epoch": 2.91, "learning_rate": 2.9235599565269893e-05, "loss": 0.5184, "step": 3439, "task_loss": 1.5392311811447144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5060300827026367, "epoch": 2.91, "learning_rate": 2.9229561647144067e-05, "loss": 0.4969, "step": 3440, "task_loss": 0.8484686017036438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7074692249298096, "epoch": 2.91, "learning_rate": 2.9223523729018237e-05, "loss": 0.7183, "step": 3441, "task_loss": 0.8704428672790527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3681033253669739, "epoch": 2.91, "learning_rate": 2.9217485810892404e-05, "loss": 0.6396, "step": 3442, "task_loss": 0.24179638922214508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5752744078636169, "epoch": 2.91, "learning_rate": 2.9211447892766575e-05, "loss": 0.5775, "step": 3443, "task_loss": 0.4481542408466339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28577113151550293, "epoch": 2.91, "learning_rate": 2.9205409974640746e-05, "loss": 0.4721, "step": 3444, "task_loss": 0.16408294439315796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4127446413040161, "epoch": 2.91, "learning_rate": 2.9199372056514916e-05, "loss": 0.5529, "step": 3445, "task_loss": 1.1957964897155762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5066368579864502, "epoch": 2.91, "learning_rate": 2.9193334138389083e-05, "loss": 0.5108, "step": 3446, "task_loss": 0.2622147500514984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.564972996711731, "epoch": 2.91, "learning_rate": 2.9187296220263254e-05, "loss": 0.4442, "step": 3447, "task_loss": 1.021462082862854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5076010227203369, "epoch": 2.91, "learning_rate": 2.9181258302137428e-05, "loss": 0.4924, "step": 3448, "task_loss": 0.7071394920349121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.54072105884552, "epoch": 2.91, "learning_rate": 2.917522038401159e-05, "loss": 0.4398, "step": 3449, "task_loss": 1.1468206644058228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5838050842285156, "epoch": 2.92, "learning_rate": 2.9169182465885762e-05, "loss": 0.5329, "step": 3450, "task_loss": 0.3193611204624176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39292919635772705, "epoch": 2.92, "learning_rate": 2.9163144547759936e-05, "loss": 0.4709, "step": 3451, "task_loss": 1.3319063186645508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3642688989639282, "epoch": 2.92, "learning_rate": 2.91571066296341e-05, "loss": 0.4763, "step": 3452, "task_loss": 0.2878212034702301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19050927460193634, "epoch": 2.92, "learning_rate": 2.9151068711508274e-05, "loss": 0.4916, "step": 3453, "task_loss": 0.611759603023529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5157721042633057, "epoch": 2.92, "learning_rate": 2.9145030793382445e-05, "loss": 0.5459, "step": 3454, "task_loss": 0.8579574823379517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5713892579078674, "epoch": 2.92, "learning_rate": 2.9138992875256615e-05, "loss": 0.5749, "step": 3455, "task_loss": 2.6081035137176514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3976712226867676, "epoch": 2.92, "learning_rate": 2.9132954957130782e-05, "loss": 0.5334, "step": 3456, "task_loss": 1.1766719818115234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40427297353744507, "epoch": 2.92, "learning_rate": 2.9126917039004953e-05, "loss": 0.5201, "step": 3457, "task_loss": 0.7822862863540649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5334845781326294, "epoch": 2.92, "learning_rate": 2.9120879120879123e-05, "loss": 0.4598, "step": 3458, "task_loss": 0.8179314136505127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6641910076141357, "epoch": 2.92, "learning_rate": 2.911484120275329e-05, "loss": 0.3888, "step": 3459, "task_loss": 0.398189514875412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.532219409942627, "epoch": 2.92, "learning_rate": 2.910880328462746e-05, "loss": 0.4367, "step": 3460, "task_loss": 0.6790198087692261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5220595002174377, "epoch": 2.93, "learning_rate": 2.9102765366501632e-05, "loss": 0.4601, "step": 3461, "task_loss": 0.5087848901748657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4570496082305908, "epoch": 2.93, "learning_rate": 2.90967274483758e-05, "loss": 0.4487, "step": 3462, "task_loss": 1.7430305480957031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3868756592273712, "epoch": 2.93, "learning_rate": 2.909068953024997e-05, "loss": 0.4643, "step": 3463, "task_loss": 1.5979385375976562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5813339948654175, "epoch": 2.93, "learning_rate": 2.9084651612124143e-05, "loss": 0.568, "step": 3464, "task_loss": 0.3921363353729248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6724848747253418, "epoch": 2.93, "learning_rate": 2.9078613693998314e-05, "loss": 0.486, "step": 3465, "task_loss": 0.6430695056915283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47483354806900024, "epoch": 2.93, "learning_rate": 2.9072575775872478e-05, "loss": 0.5791, "step": 3466, "task_loss": 0.4056547284126282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42587754130363464, "epoch": 2.93, "learning_rate": 2.9066537857746652e-05, "loss": 0.4609, "step": 3467, "task_loss": 0.8009828925132751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6594436764717102, "epoch": 2.93, "learning_rate": 2.9060499939620822e-05, "loss": 0.5086, "step": 3468, "task_loss": 0.47763320803642273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6408342719078064, "epoch": 2.93, "learning_rate": 2.905446202149499e-05, "loss": 0.5514, "step": 3469, "task_loss": 1.300635814666748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8068261742591858, "epoch": 2.93, "learning_rate": 2.904842410336916e-05, "loss": 0.6271, "step": 3470, "task_loss": 1.4490320682525635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.634763240814209, "epoch": 2.93, "learning_rate": 2.904238618524333e-05, "loss": 0.594, "step": 3471, "task_loss": 0.601643443107605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28066402673721313, "epoch": 2.93, "learning_rate": 2.9036348267117498e-05, "loss": 0.5915, "step": 3472, "task_loss": 0.3154022991657257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5533572435379028, "epoch": 2.94, "learning_rate": 2.903031034899167e-05, "loss": 0.527, "step": 3473, "task_loss": 0.5658393502235413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3488887846469879, "epoch": 2.94, "learning_rate": 2.902427243086584e-05, "loss": 0.5368, "step": 3474, "task_loss": 0.9310674667358398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5667470693588257, "epoch": 2.94, "learning_rate": 2.901823451274001e-05, "loss": 0.6257, "step": 3475, "task_loss": 0.4527128040790558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39881351590156555, "epoch": 2.94, "learning_rate": 2.9012196594614177e-05, "loss": 0.4911, "step": 3476, "task_loss": 0.495433509349823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6228632926940918, "epoch": 2.94, "learning_rate": 2.9006158676488347e-05, "loss": 0.4598, "step": 3477, "task_loss": 0.5286601185798645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41025713086128235, "epoch": 2.94, "learning_rate": 2.900012075836252e-05, "loss": 0.5866, "step": 3478, "task_loss": 0.4725244641304016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.424943745136261, "epoch": 2.94, "learning_rate": 2.8994082840236685e-05, "loss": 0.5107, "step": 3479, "task_loss": 0.9427638053894043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7621676325798035, "epoch": 2.94, "learning_rate": 2.898804492211086e-05, "loss": 0.4535, "step": 3480, "task_loss": 0.981867253780365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5664827823638916, "epoch": 2.94, "learning_rate": 2.898200700398503e-05, "loss": 0.6907, "step": 3481, "task_loss": 1.2118258476257324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8833428025245667, "epoch": 2.94, "learning_rate": 2.8975969085859193e-05, "loss": 0.6235, "step": 3482, "task_loss": 0.663048505783081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31078314781188965, "epoch": 2.94, "learning_rate": 2.8969931167733367e-05, "loss": 0.4009, "step": 3483, "task_loss": 0.15618249773979187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48118141293525696, "epoch": 2.94, "learning_rate": 2.8963893249607538e-05, "loss": 0.4821, "step": 3484, "task_loss": 0.5979284644126892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.678946316242218, "epoch": 2.95, "learning_rate": 2.895785533148171e-05, "loss": 0.5054, "step": 3485, "task_loss": 1.2108368873596191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6566177606582642, "epoch": 2.95, "learning_rate": 2.8951817413355876e-05, "loss": 0.5466, "step": 3486, "task_loss": 0.9393370151519775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.822662353515625, "epoch": 2.95, "learning_rate": 2.8945779495230046e-05, "loss": 0.5581, "step": 3487, "task_loss": 0.9881158471107483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3688987195491791, "epoch": 2.95, "learning_rate": 2.8939741577104217e-05, "loss": 0.5029, "step": 3488, "task_loss": 0.5266862511634827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5146064162254333, "epoch": 2.95, "learning_rate": 2.8933703658978384e-05, "loss": 0.5404, "step": 3489, "task_loss": 1.4808974266052246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.447113037109375, "epoch": 2.95, "learning_rate": 2.8927665740852555e-05, "loss": 0.4084, "step": 3490, "task_loss": 0.33805495500564575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46426868438720703, "epoch": 2.95, "learning_rate": 2.8921627822726725e-05, "loss": 0.4831, "step": 3491, "task_loss": 0.39682838320732117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46143612265586853, "epoch": 2.95, "learning_rate": 2.8915589904600892e-05, "loss": 0.5389, "step": 3492, "task_loss": 0.7432441711425781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.363934725522995, "epoch": 2.95, "learning_rate": 2.8909551986475063e-05, "loss": 0.432, "step": 3493, "task_loss": 0.4092792272567749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5947434902191162, "epoch": 2.95, "learning_rate": 2.8903514068349237e-05, "loss": 0.5868, "step": 3494, "task_loss": 0.9408076405525208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3117849826812744, "epoch": 2.95, "learning_rate": 2.8897476150223407e-05, "loss": 0.4162, "step": 3495, "task_loss": 0.7537088990211487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4734795391559601, "epoch": 2.95, "learning_rate": 2.889143823209757e-05, "loss": 0.5574, "step": 3496, "task_loss": 1.3328896760940552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7835291028022766, "epoch": 2.96, "learning_rate": 2.8885400313971745e-05, "loss": 0.5317, "step": 3497, "task_loss": 0.9472994804382324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41001492738723755, "epoch": 2.96, "learning_rate": 2.8879362395845916e-05, "loss": 0.4143, "step": 3498, "task_loss": 0.6593810319900513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47645139694213867, "epoch": 2.96, "learning_rate": 2.8873324477720083e-05, "loss": 0.6385, "step": 3499, "task_loss": 0.1965888887643814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5061509609222412, "epoch": 2.96, "learning_rate": 2.8867286559594254e-05, "loss": 0.4013, "step": 3500, "task_loss": 0.9322546124458313 }, { "epoch": 2.96, "eval_accuracy": 0.9043960396039604, "eval_loss": 0.31759944558143616, "eval_runtime": 320.5213, "eval_samples_per_second": 78.778, "eval_steps_per_second": 0.618, "step": 3500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4696592688560486, "epoch": 2.96, "learning_rate": 2.8861248641468424e-05, "loss": 0.5604, "step": 3501, "task_loss": 1.0248576402664185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6056488156318665, "epoch": 2.96, "learning_rate": 2.885521072334259e-05, "loss": 0.5984, "step": 3502, "task_loss": 0.781438410282135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7048083543777466, "epoch": 2.96, "learning_rate": 2.8849172805216762e-05, "loss": 0.4217, "step": 3503, "task_loss": 0.10842147469520569 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6219778656959534, "epoch": 2.96, "learning_rate": 2.8843134887090932e-05, "loss": 0.4817, "step": 3504, "task_loss": 1.5508651733398438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3820396959781647, "epoch": 2.96, "learning_rate": 2.8837096968965106e-05, "loss": 0.3763, "step": 3505, "task_loss": 1.8252805471420288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.76343834400177, "epoch": 2.96, "learning_rate": 2.883105905083927e-05, "loss": 0.5318, "step": 3506, "task_loss": 1.1847989559173584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4680008292198181, "epoch": 2.96, "learning_rate": 2.882502113271344e-05, "loss": 0.6533, "step": 3507, "task_loss": 0.9814065098762512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6531151533126831, "epoch": 2.96, "learning_rate": 2.8818983214587615e-05, "loss": 0.5039, "step": 3508, "task_loss": 0.33981743454933167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8484349250793457, "epoch": 2.97, "learning_rate": 2.881294529646178e-05, "loss": 0.5871, "step": 3509, "task_loss": 1.052997350692749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6698601841926575, "epoch": 2.97, "learning_rate": 2.8806907378335952e-05, "loss": 0.4782, "step": 3510, "task_loss": 2.121389865875244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4575510323047638, "epoch": 2.97, "learning_rate": 2.8800869460210123e-05, "loss": 0.7633, "step": 3511, "task_loss": 1.5436875820159912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37561875581741333, "epoch": 2.97, "learning_rate": 2.8794831542084287e-05, "loss": 0.5382, "step": 3512, "task_loss": 0.7575316429138184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5068179368972778, "epoch": 2.97, "learning_rate": 2.878879362395846e-05, "loss": 0.5291, "step": 3513, "task_loss": 1.573523998260498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.355526328086853, "epoch": 2.97, "learning_rate": 2.878275570583263e-05, "loss": 0.4039, "step": 3514, "task_loss": 0.10360342264175415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6029901504516602, "epoch": 2.97, "learning_rate": 2.8776717787706802e-05, "loss": 0.5695, "step": 3515, "task_loss": 1.2131675481796265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4059915244579315, "epoch": 2.97, "learning_rate": 2.877067986958097e-05, "loss": 0.4925, "step": 3516, "task_loss": 0.4989473521709442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.445908784866333, "epoch": 2.97, "learning_rate": 2.876464195145514e-05, "loss": 0.4327, "step": 3517, "task_loss": 0.47930845618247986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35263484716415405, "epoch": 2.97, "learning_rate": 2.875860403332931e-05, "loss": 0.4281, "step": 3518, "task_loss": 0.43799078464508057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4956884980201721, "epoch": 2.97, "learning_rate": 2.8752566115203477e-05, "loss": 0.4546, "step": 3519, "task_loss": 0.7812923789024353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2910519540309906, "epoch": 2.97, "learning_rate": 2.8746528197077648e-05, "loss": 0.42, "step": 3520, "task_loss": 0.16700947284698486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.222733736038208, "epoch": 2.98, "learning_rate": 2.8740490278951822e-05, "loss": 0.6555, "step": 3521, "task_loss": 1.59226393699646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4519144892692566, "epoch": 2.98, "learning_rate": 2.8734452360825986e-05, "loss": 0.5243, "step": 3522, "task_loss": 0.23115915060043335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31321656703948975, "epoch": 2.98, "learning_rate": 2.8728414442700156e-05, "loss": 0.4542, "step": 3523, "task_loss": 0.5738106966018677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5124401450157166, "epoch": 2.98, "learning_rate": 2.872237652457433e-05, "loss": 0.4715, "step": 3524, "task_loss": 0.5305476188659668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5176233053207397, "epoch": 2.98, "learning_rate": 2.8716338606448494e-05, "loss": 0.4076, "step": 3525, "task_loss": 0.15946051478385925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6941971778869629, "epoch": 2.98, "learning_rate": 2.8710300688322668e-05, "loss": 0.5732, "step": 3526, "task_loss": 0.5419420599937439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5680044889450073, "epoch": 2.98, "learning_rate": 2.870426277019684e-05, "loss": 0.4936, "step": 3527, "task_loss": 1.200368046760559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32157236337661743, "epoch": 2.98, "learning_rate": 2.869822485207101e-05, "loss": 0.5255, "step": 3528, "task_loss": 0.7837161421775818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1495788097381592, "epoch": 2.98, "learning_rate": 2.8692186933945176e-05, "loss": 0.5503, "step": 3529, "task_loss": 1.0662959814071655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5993354916572571, "epoch": 2.98, "learning_rate": 2.8686149015819347e-05, "loss": 0.6436, "step": 3530, "task_loss": 1.8772743940353394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40318620204925537, "epoch": 2.98, "learning_rate": 2.8680111097693518e-05, "loss": 0.5939, "step": 3531, "task_loss": 0.3085368871688843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3092621862888336, "epoch": 2.99, "learning_rate": 2.8674073179567685e-05, "loss": 0.3925, "step": 3532, "task_loss": 0.31794121861457825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6325441598892212, "epoch": 2.99, "learning_rate": 2.8668035261441855e-05, "loss": 0.5815, "step": 3533, "task_loss": 1.6624577045440674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3288254737854004, "epoch": 2.99, "learning_rate": 2.8661997343316026e-05, "loss": 0.43, "step": 3534, "task_loss": 0.3448793888092041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5193144679069519, "epoch": 2.99, "learning_rate": 2.8655959425190193e-05, "loss": 0.4627, "step": 3535, "task_loss": 0.8711181879043579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4365656077861786, "epoch": 2.99, "learning_rate": 2.8649921507064364e-05, "loss": 0.5685, "step": 3536, "task_loss": 0.5186581611633301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3573707938194275, "epoch": 2.99, "learning_rate": 2.8643883588938538e-05, "loss": 0.4181, "step": 3537, "task_loss": 0.9649747610092163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.281457781791687, "epoch": 2.99, "learning_rate": 2.8637845670812708e-05, "loss": 0.4403, "step": 3538, "task_loss": 0.56329345703125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5294956564903259, "epoch": 2.99, "learning_rate": 2.8631807752686872e-05, "loss": 0.4806, "step": 3539, "task_loss": 1.0712430477142334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45205825567245483, "epoch": 2.99, "learning_rate": 2.8625769834561046e-05, "loss": 0.5266, "step": 3540, "task_loss": 1.4153289794921875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41040658950805664, "epoch": 2.99, "learning_rate": 2.8619731916435216e-05, "loss": 0.3862, "step": 3541, "task_loss": 0.8456797003746033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5531144738197327, "epoch": 2.99, "learning_rate": 2.8613693998309384e-05, "loss": 0.4225, "step": 3542, "task_loss": 0.5082275867462158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.555311918258667, "epoch": 2.99, "learning_rate": 2.8607656080183554e-05, "loss": 0.4137, "step": 3543, "task_loss": 1.2110164165496826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3252636194229126, "epoch": 3.0, "learning_rate": 2.8601618162057725e-05, "loss": 0.6191, "step": 3544, "task_loss": 0.9290558099746704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30460429191589355, "epoch": 3.0, "learning_rate": 2.8595580243931892e-05, "loss": 0.4289, "step": 3545, "task_loss": 0.5148041844367981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0611107349395752, "epoch": 3.0, "learning_rate": 2.8589542325806063e-05, "loss": 0.6806, "step": 3546, "task_loss": 0.9801679849624634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7500762939453125, "epoch": 3.0, "learning_rate": 2.8583504407680233e-05, "loss": 0.5951, "step": 3547, "task_loss": 0.6575257778167725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6670458316802979, "epoch": 3.0, "learning_rate": 2.8577466489554404e-05, "loss": 0.3625, "step": 3548, "task_loss": 0.4299856126308441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6836391091346741, "epoch": 3.0, "learning_rate": 2.857142857142857e-05, "loss": 0.454, "step": 3549, "task_loss": 0.6034637689590454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3533514142036438, "epoch": 3.0, "learning_rate": 2.856539065330274e-05, "loss": 0.9688, "step": 3550, "task_loss": 0.9581204056739807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33847349882125854, "epoch": 3.0, "learning_rate": 2.8559352735176915e-05, "loss": 0.4388, "step": 3551, "task_loss": 0.5296328663825989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4898054003715515, "epoch": 3.0, "learning_rate": 2.855331481705108e-05, "loss": 0.4538, "step": 3552, "task_loss": 0.17184749245643616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5463446378707886, "epoch": 3.0, "learning_rate": 2.8547276898925253e-05, "loss": 0.4525, "step": 3553, "task_loss": 0.7138597369194031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.755555272102356, "epoch": 3.0, "learning_rate": 2.8541238980799424e-05, "loss": 0.5134, "step": 3554, "task_loss": 0.38093385100364685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27296683192253113, "epoch": 3.01, "learning_rate": 2.8535201062673588e-05, "loss": 0.4114, "step": 3555, "task_loss": 0.55452960729599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48459380865097046, "epoch": 3.01, "learning_rate": 2.852916314454776e-05, "loss": 0.4387, "step": 3556, "task_loss": 0.6932774782180786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3475702106952667, "epoch": 3.01, "learning_rate": 2.8523125226421932e-05, "loss": 0.4244, "step": 3557, "task_loss": 0.15120479464530945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30520516633987427, "epoch": 3.01, "learning_rate": 2.8517087308296103e-05, "loss": 0.4541, "step": 3558, "task_loss": 0.5316913723945618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4337761402130127, "epoch": 3.01, "learning_rate": 2.851104939017027e-05, "loss": 0.5146, "step": 3559, "task_loss": 0.6827594637870789 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7733412981033325, "epoch": 3.01, "learning_rate": 2.850501147204444e-05, "loss": 0.5886, "step": 3560, "task_loss": 1.4241622686386108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6680446863174438, "epoch": 3.01, "learning_rate": 2.849897355391861e-05, "loss": 0.6058, "step": 3561, "task_loss": 1.7751154899597168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5004181861877441, "epoch": 3.01, "learning_rate": 2.8492935635792778e-05, "loss": 0.5546, "step": 3562, "task_loss": 0.3993878662586212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6173107624053955, "epoch": 3.01, "learning_rate": 2.848689771766695e-05, "loss": 0.5669, "step": 3563, "task_loss": 0.36234623193740845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5381333827972412, "epoch": 3.01, "learning_rate": 2.848085979954112e-05, "loss": 0.573, "step": 3564, "task_loss": 0.699542760848999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25445032119750977, "epoch": 3.01, "learning_rate": 2.8474821881415286e-05, "loss": 0.4701, "step": 3565, "task_loss": 0.8389710187911987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6874338388442993, "epoch": 3.01, "learning_rate": 2.8468783963289457e-05, "loss": 0.4907, "step": 3566, "task_loss": 0.3249737620353699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5329925417900085, "epoch": 3.02, "learning_rate": 2.846274604516363e-05, "loss": 0.6199, "step": 3567, "task_loss": 0.9511284828186035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6280394792556763, "epoch": 3.02, "learning_rate": 2.84567081270378e-05, "loss": 0.5125, "step": 3568, "task_loss": 1.4617928266525269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4504295587539673, "epoch": 3.02, "learning_rate": 2.8450670208911965e-05, "loss": 0.5466, "step": 3569, "task_loss": 0.7624999284744263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5928565263748169, "epoch": 3.02, "learning_rate": 2.844463229078614e-05, "loss": 0.4324, "step": 3570, "task_loss": 0.9979885220527649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2984638512134552, "epoch": 3.02, "learning_rate": 2.843859437266031e-05, "loss": 0.4787, "step": 3571, "task_loss": 0.3345983326435089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5467271208763123, "epoch": 3.02, "learning_rate": 2.8432556454534477e-05, "loss": 0.6054, "step": 3572, "task_loss": 0.9508162140846252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5683314204216003, "epoch": 3.02, "learning_rate": 2.8426518536408648e-05, "loss": 0.4127, "step": 3573, "task_loss": 1.2644039392471313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2952745854854584, "epoch": 3.02, "learning_rate": 2.8420480618282818e-05, "loss": 0.4286, "step": 3574, "task_loss": 0.2756368815898895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5673413276672363, "epoch": 3.02, "learning_rate": 2.8414442700156985e-05, "loss": 0.6141, "step": 3575, "task_loss": 1.2518839836120605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6365405321121216, "epoch": 3.02, "learning_rate": 2.8408404782031156e-05, "loss": 0.4843, "step": 3576, "task_loss": 1.0630043745040894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33388882875442505, "epoch": 3.02, "learning_rate": 2.8402366863905327e-05, "loss": 0.4401, "step": 3577, "task_loss": 0.20198211073875427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3510734736919403, "epoch": 3.02, "learning_rate": 2.83963289457795e-05, "loss": 0.4814, "step": 3578, "task_loss": 0.4280450940132141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3837463855743408, "epoch": 3.03, "learning_rate": 2.8390291027653664e-05, "loss": 0.3938, "step": 3579, "task_loss": 0.630935788154602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32645487785339355, "epoch": 3.03, "learning_rate": 2.8384253109527835e-05, "loss": 0.3543, "step": 3580, "task_loss": 0.368166446685791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6891698837280273, "epoch": 3.03, "learning_rate": 2.837821519140201e-05, "loss": 0.6258, "step": 3581, "task_loss": 1.0404555797576904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3046247363090515, "epoch": 3.03, "learning_rate": 2.8372177273276173e-05, "loss": 0.3869, "step": 3582, "task_loss": 0.50554358959198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7297601699829102, "epoch": 3.03, "learning_rate": 2.8366139355150347e-05, "loss": 0.6043, "step": 3583, "task_loss": 1.3709642887115479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5272302031517029, "epoch": 3.03, "learning_rate": 2.8360101437024517e-05, "loss": 0.461, "step": 3584, "task_loss": 0.7935236096382141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3003286123275757, "epoch": 3.03, "learning_rate": 2.835406351889868e-05, "loss": 0.5803, "step": 3585, "task_loss": 0.5901904702186584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7041680216789246, "epoch": 3.03, "learning_rate": 2.8348025600772855e-05, "loss": 0.4253, "step": 3586, "task_loss": 1.0076161623001099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45832404494285583, "epoch": 3.03, "learning_rate": 2.8341987682647025e-05, "loss": 0.5201, "step": 3587, "task_loss": 1.4068443775177002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6017296314239502, "epoch": 3.03, "learning_rate": 2.8335949764521196e-05, "loss": 0.6048, "step": 3588, "task_loss": 0.8529587388038635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40517953038215637, "epoch": 3.03, "learning_rate": 2.8329911846395363e-05, "loss": 0.4565, "step": 3589, "task_loss": 0.3251385986804962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6033695936203003, "epoch": 3.03, "learning_rate": 2.8323873928269534e-05, "loss": 0.5075, "step": 3590, "task_loss": 0.561063826084137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46417003870010376, "epoch": 3.04, "learning_rate": 2.8317836010143704e-05, "loss": 0.4484, "step": 3591, "task_loss": 1.1085803508758545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5526866316795349, "epoch": 3.04, "learning_rate": 2.831179809201787e-05, "loss": 0.6007, "step": 3592, "task_loss": 1.0920662879943848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37230247259140015, "epoch": 3.04, "learning_rate": 2.8305760173892042e-05, "loss": 0.3547, "step": 3593, "task_loss": 0.3790980577468872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9909676313400269, "epoch": 3.04, "learning_rate": 2.8299722255766216e-05, "loss": 0.579, "step": 3594, "task_loss": 1.1322625875473022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22979691624641418, "epoch": 3.04, "learning_rate": 2.829368433764038e-05, "loss": 0.346, "step": 3595, "task_loss": 0.23855134844779968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4794676899909973, "epoch": 3.04, "learning_rate": 2.828764641951455e-05, "loss": 0.4032, "step": 3596, "task_loss": 0.344666063785553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7051802277565002, "epoch": 3.04, "learning_rate": 2.8281608501388724e-05, "loss": 0.4609, "step": 3597, "task_loss": 1.6266266107559204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6506420373916626, "epoch": 3.04, "learning_rate": 2.8275570583262895e-05, "loss": 0.5281, "step": 3598, "task_loss": 1.0988396406173706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4237847626209259, "epoch": 3.04, "learning_rate": 2.8269532665137062e-05, "loss": 0.5078, "step": 3599, "task_loss": 0.6086234450340271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4326009750366211, "epoch": 3.04, "learning_rate": 2.8263494747011233e-05, "loss": 0.4878, "step": 3600, "task_loss": 0.2055385559797287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5627312660217285, "epoch": 3.04, "learning_rate": 2.8257456828885403e-05, "loss": 0.5025, "step": 3601, "task_loss": 2.380143880844116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6212216019630432, "epoch": 3.04, "learning_rate": 2.825141891075957e-05, "loss": 0.5817, "step": 3602, "task_loss": 1.3902779817581177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41762039065361023, "epoch": 3.05, "learning_rate": 2.824538099263374e-05, "loss": 0.3821, "step": 3603, "task_loss": 0.6749762296676636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45919179916381836, "epoch": 3.05, "learning_rate": 2.823934307450791e-05, "loss": 0.3895, "step": 3604, "task_loss": 0.4431206285953522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6568092703819275, "epoch": 3.05, "learning_rate": 2.823330515638208e-05, "loss": 0.483, "step": 3605, "task_loss": 0.31661882996559143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2956150770187378, "epoch": 3.05, "learning_rate": 2.822726723825625e-05, "loss": 0.3153, "step": 3606, "task_loss": 0.5368849039077759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6561458110809326, "epoch": 3.05, "learning_rate": 2.822122932013042e-05, "loss": 0.4368, "step": 3607, "task_loss": 0.2726486623287201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48889702558517456, "epoch": 3.05, "learning_rate": 2.8215191402004594e-05, "loss": 0.3976, "step": 3608, "task_loss": 0.3532431125640869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36325594782829285, "epoch": 3.05, "learning_rate": 2.8209153483878758e-05, "loss": 0.3915, "step": 3609, "task_loss": 0.41280871629714966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33242660760879517, "epoch": 3.05, "learning_rate": 2.820311556575293e-05, "loss": 0.4877, "step": 3610, "task_loss": 0.5544769763946533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5429433584213257, "epoch": 3.05, "learning_rate": 2.8197077647627102e-05, "loss": 0.4787, "step": 3611, "task_loss": 0.6222197413444519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4575852155685425, "epoch": 3.05, "learning_rate": 2.8191039729501266e-05, "loss": 0.4213, "step": 3612, "task_loss": 0.23648343980312347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20484265685081482, "epoch": 3.05, "learning_rate": 2.818500181137544e-05, "loss": 0.409, "step": 3613, "task_loss": 0.19951371848583221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6237741708755493, "epoch": 3.05, "learning_rate": 2.817896389324961e-05, "loss": 0.5062, "step": 3614, "task_loss": 0.6917451024055481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.210088849067688, "epoch": 3.06, "learning_rate": 2.8172925975123778e-05, "loss": 0.4693, "step": 3615, "task_loss": 0.553411066532135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6486247777938843, "epoch": 3.06, "learning_rate": 2.8166888056997948e-05, "loss": 0.5587, "step": 3616, "task_loss": 0.2379772961139679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4274231195449829, "epoch": 3.06, "learning_rate": 2.816085013887212e-05, "loss": 0.5049, "step": 3617, "task_loss": 0.9958541393280029 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39084628224372864, "epoch": 3.06, "learning_rate": 2.815481222074629e-05, "loss": 0.4523, "step": 3618, "task_loss": 0.5883374810218811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38636350631713867, "epoch": 3.06, "learning_rate": 2.8148774302620457e-05, "loss": 0.5571, "step": 3619, "task_loss": 0.06682948768138885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21309836208820343, "epoch": 3.06, "learning_rate": 2.8142736384494627e-05, "loss": 0.4645, "step": 3620, "task_loss": 0.11448206007480621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6594511866569519, "epoch": 3.06, "learning_rate": 2.8136698466368798e-05, "loss": 0.5874, "step": 3621, "task_loss": 0.5748461484909058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.376488596200943, "epoch": 3.06, "learning_rate": 2.8130660548242965e-05, "loss": 0.4883, "step": 3622, "task_loss": 0.5237273573875427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29915887117385864, "epoch": 3.06, "learning_rate": 2.8124622630117136e-05, "loss": 0.3813, "step": 3623, "task_loss": 0.5404086709022522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.318030446767807, "epoch": 3.06, "learning_rate": 2.811858471199131e-05, "loss": 0.5499, "step": 3624, "task_loss": 0.569028377532959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5352079272270203, "epoch": 3.06, "learning_rate": 2.8112546793865473e-05, "loss": 0.4882, "step": 3625, "task_loss": 1.0020768642425537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5007918477058411, "epoch": 3.07, "learning_rate": 2.8106508875739644e-05, "loss": 0.4306, "step": 3626, "task_loss": 0.7473089098930359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29142212867736816, "epoch": 3.07, "learning_rate": 2.8100470957613818e-05, "loss": 0.3911, "step": 3627, "task_loss": 0.27181142568588257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6356551051139832, "epoch": 3.07, "learning_rate": 2.809443303948799e-05, "loss": 0.5373, "step": 3628, "task_loss": 0.4069119095802307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3050776422023773, "epoch": 3.07, "learning_rate": 2.8088395121362156e-05, "loss": 0.3771, "step": 3629, "task_loss": 0.4783659875392914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4592183828353882, "epoch": 3.07, "learning_rate": 2.8082357203236326e-05, "loss": 0.3757, "step": 3630, "task_loss": 0.5908127427101135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9561770558357239, "epoch": 3.07, "learning_rate": 2.8076319285110497e-05, "loss": 0.5924, "step": 3631, "task_loss": 1.3677409887313843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5705920457839966, "epoch": 3.07, "learning_rate": 2.8070281366984664e-05, "loss": 0.5787, "step": 3632, "task_loss": 0.49858781695365906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6246144771575928, "epoch": 3.07, "learning_rate": 2.8064243448858834e-05, "loss": 0.5949, "step": 3633, "task_loss": 1.1533740758895874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4577222764492035, "epoch": 3.07, "learning_rate": 2.8058205530733005e-05, "loss": 0.4027, "step": 3634, "task_loss": 0.32916343212127686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23387545347213745, "epoch": 3.07, "learning_rate": 2.8052167612607172e-05, "loss": 0.5222, "step": 3635, "task_loss": 0.37863969802856445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5962748527526855, "epoch": 3.07, "learning_rate": 2.8046129694481343e-05, "loss": 0.5254, "step": 3636, "task_loss": 0.1686752289533615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25656014680862427, "epoch": 3.07, "learning_rate": 2.8040091776355513e-05, "loss": 0.4647, "step": 3637, "task_loss": 0.9202278852462769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5023301839828491, "epoch": 3.08, "learning_rate": 2.8034053858229687e-05, "loss": 0.5045, "step": 3638, "task_loss": 0.5901650786399841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.598271369934082, "epoch": 3.08, "learning_rate": 2.802801594010385e-05, "loss": 0.4439, "step": 3639, "task_loss": 0.22536367177963257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5311582088470459, "epoch": 3.08, "learning_rate": 2.8021978021978025e-05, "loss": 0.5429, "step": 3640, "task_loss": 1.3346364498138428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5746275782585144, "epoch": 3.08, "learning_rate": 2.8015940103852196e-05, "loss": 0.6143, "step": 3641, "task_loss": 0.10542315244674683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48260021209716797, "epoch": 3.08, "learning_rate": 2.800990218572636e-05, "loss": 0.4859, "step": 3642, "task_loss": 0.9695532917976379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3220760226249695, "epoch": 3.08, "learning_rate": 2.8003864267600533e-05, "loss": 0.4761, "step": 3643, "task_loss": 0.34544941782951355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32329991459846497, "epoch": 3.08, "learning_rate": 2.7997826349474704e-05, "loss": 0.3876, "step": 3644, "task_loss": 0.48132896423339844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2614002227783203, "epoch": 3.08, "learning_rate": 2.799178843134887e-05, "loss": 0.3392, "step": 3645, "task_loss": 0.04544193297624588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22407756745815277, "epoch": 3.08, "learning_rate": 2.7985750513223042e-05, "loss": 0.3615, "step": 3646, "task_loss": 0.39186593890190125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4492708444595337, "epoch": 3.08, "learning_rate": 2.7979712595097212e-05, "loss": 0.4968, "step": 3647, "task_loss": 0.584966778755188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26641926169395447, "epoch": 3.08, "learning_rate": 2.7973674676971383e-05, "loss": 0.6101, "step": 3648, "task_loss": 0.8458240628242493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6282322406768799, "epoch": 3.08, "learning_rate": 2.796763675884555e-05, "loss": 0.4563, "step": 3649, "task_loss": 0.24715039134025574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2719741463661194, "epoch": 3.09, "learning_rate": 2.796159884071972e-05, "loss": 0.3821, "step": 3650, "task_loss": 1.2406189441680908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4035729765892029, "epoch": 3.09, "learning_rate": 2.7955560922593895e-05, "loss": 0.5979, "step": 3651, "task_loss": 0.9433731436729431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6591372489929199, "epoch": 3.09, "learning_rate": 2.794952300446806e-05, "loss": 0.4946, "step": 3652, "task_loss": 0.8267763257026672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6503729820251465, "epoch": 3.09, "learning_rate": 2.794348508634223e-05, "loss": 0.4986, "step": 3653, "task_loss": 0.5931035876274109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6868230104446411, "epoch": 3.09, "learning_rate": 2.7937447168216403e-05, "loss": 0.6527, "step": 3654, "task_loss": 0.9304125308990479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3495268225669861, "epoch": 3.09, "learning_rate": 2.7931409250090567e-05, "loss": 0.4891, "step": 3655, "task_loss": 0.6375213861465454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5631970763206482, "epoch": 3.09, "learning_rate": 2.792537133196474e-05, "loss": 0.4469, "step": 3656, "task_loss": 0.6136302947998047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3942711055278778, "epoch": 3.09, "learning_rate": 2.791933341383891e-05, "loss": 0.6714, "step": 3657, "task_loss": 0.5084966421127319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3118196427822113, "epoch": 3.09, "learning_rate": 2.7913295495713082e-05, "loss": 0.5567, "step": 3658, "task_loss": 0.9686987400054932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5054408311843872, "epoch": 3.09, "learning_rate": 2.790725757758725e-05, "loss": 0.3716, "step": 3659, "task_loss": 0.31585702300071716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6493421792984009, "epoch": 3.09, "learning_rate": 2.790121965946142e-05, "loss": 0.3589, "step": 3660, "task_loss": 0.36718112230300903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.621542751789093, "epoch": 3.09, "learning_rate": 2.789518174133559e-05, "loss": 0.5424, "step": 3661, "task_loss": 1.054340124130249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2827891707420349, "epoch": 3.1, "learning_rate": 2.7889143823209757e-05, "loss": 0.4667, "step": 3662, "task_loss": 0.6524767279624939 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5372504591941833, "epoch": 3.1, "learning_rate": 2.7883105905083928e-05, "loss": 0.5108, "step": 3663, "task_loss": 1.2178716659545898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5190407037734985, "epoch": 3.1, "learning_rate": 2.78770679869581e-05, "loss": 0.5419, "step": 3664, "task_loss": 1.3822969198226929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.637588381767273, "epoch": 3.1, "learning_rate": 2.7871030068832266e-05, "loss": 0.4763, "step": 3665, "task_loss": 0.6313797831535339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36066240072250366, "epoch": 3.1, "learning_rate": 2.7864992150706436e-05, "loss": 0.3856, "step": 3666, "task_loss": 0.36148226261138916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3255571126937866, "epoch": 3.1, "learning_rate": 2.785895423258061e-05, "loss": 0.3976, "step": 3667, "task_loss": 0.4383634328842163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21542629599571228, "epoch": 3.1, "learning_rate": 2.785291631445478e-05, "loss": 0.4528, "step": 3668, "task_loss": 0.12668272852897644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44919925928115845, "epoch": 3.1, "learning_rate": 2.7846878396328945e-05, "loss": 0.4941, "step": 3669, "task_loss": 0.271106094121933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31585222482681274, "epoch": 3.1, "learning_rate": 2.784084047820312e-05, "loss": 0.4455, "step": 3670, "task_loss": 0.7598687410354614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33869680762290955, "epoch": 3.1, "learning_rate": 2.783480256007729e-05, "loss": 0.3751, "step": 3671, "task_loss": 0.6365596652030945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47457852959632874, "epoch": 3.1, "learning_rate": 2.7828764641951456e-05, "loss": 0.2948, "step": 3672, "task_loss": 0.4148084819316864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4919039309024811, "epoch": 3.1, "learning_rate": 2.7822726723825627e-05, "loss": 0.5445, "step": 3673, "task_loss": 0.259429007768631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18670231103897095, "epoch": 3.11, "learning_rate": 2.7816688805699797e-05, "loss": 0.3833, "step": 3674, "task_loss": 0.19527184963226318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3661242723464966, "epoch": 3.11, "learning_rate": 2.7810650887573965e-05, "loss": 0.5201, "step": 3675, "task_loss": 0.152776300907135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37765684723854065, "epoch": 3.11, "learning_rate": 2.7804612969448135e-05, "loss": 0.4769, "step": 3676, "task_loss": 0.31206339597702026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5814509987831116, "epoch": 3.11, "learning_rate": 2.7798575051322306e-05, "loss": 0.4623, "step": 3677, "task_loss": 0.16709841787815094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.392178475856781, "epoch": 3.11, "learning_rate": 2.7792537133196476e-05, "loss": 0.4218, "step": 3678, "task_loss": 1.0004663467407227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5476729273796082, "epoch": 3.11, "learning_rate": 2.7786499215070643e-05, "loss": 0.6334, "step": 3679, "task_loss": 0.9133791923522949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6512172222137451, "epoch": 3.11, "learning_rate": 2.7780461296944814e-05, "loss": 0.5119, "step": 3680, "task_loss": 1.0619369745254517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3129637837409973, "epoch": 3.11, "learning_rate": 2.7774423378818988e-05, "loss": 0.375, "step": 3681, "task_loss": 0.23747555911540985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37736237049102783, "epoch": 3.11, "learning_rate": 2.7768385460693152e-05, "loss": 0.5744, "step": 3682, "task_loss": 0.5064758658409119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41763201355934143, "epoch": 3.11, "learning_rate": 2.7762347542567326e-05, "loss": 0.6918, "step": 3683, "task_loss": 0.7087569832801819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33278292417526245, "epoch": 3.11, "learning_rate": 2.7756309624441496e-05, "loss": 0.4068, "step": 3684, "task_loss": 0.2805294096469879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3210510015487671, "epoch": 3.11, "learning_rate": 2.775027170631566e-05, "loss": 0.4709, "step": 3685, "task_loss": 0.540276050567627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2908821105957031, "epoch": 3.12, "learning_rate": 2.7744233788189834e-05, "loss": 0.4269, "step": 3686, "task_loss": 0.6372520923614502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34967511892318726, "epoch": 3.12, "learning_rate": 2.7738195870064005e-05, "loss": 0.4529, "step": 3687, "task_loss": 0.397563099861145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3590922951698303, "epoch": 3.12, "learning_rate": 2.7732157951938175e-05, "loss": 0.4211, "step": 3688, "task_loss": 0.17454321682453156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4748663902282715, "epoch": 3.12, "learning_rate": 2.7726120033812342e-05, "loss": 0.3694, "step": 3689, "task_loss": 0.5434775352478027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20685353875160217, "epoch": 3.12, "learning_rate": 2.7720082115686513e-05, "loss": 0.5269, "step": 3690, "task_loss": 1.1865594387054443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37678900361061096, "epoch": 3.12, "learning_rate": 2.7714044197560684e-05, "loss": 0.3365, "step": 3691, "task_loss": 0.31317418813705444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33249717950820923, "epoch": 3.12, "learning_rate": 2.770800627943485e-05, "loss": 0.2743, "step": 3692, "task_loss": 0.39862722158432007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6467922925949097, "epoch": 3.12, "learning_rate": 2.770196836130902e-05, "loss": 0.4484, "step": 3693, "task_loss": 0.8659442067146301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45025715231895447, "epoch": 3.12, "learning_rate": 2.7695930443183192e-05, "loss": 0.4819, "step": 3694, "task_loss": 0.12390927970409393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8230711221694946, "epoch": 3.12, "learning_rate": 2.768989252505736e-05, "loss": 0.508, "step": 3695, "task_loss": 1.1976484060287476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40372607111930847, "epoch": 3.12, "learning_rate": 2.768385460693153e-05, "loss": 0.4669, "step": 3696, "task_loss": 1.4796639680862427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5966259837150574, "epoch": 3.13, "learning_rate": 2.7677816688805704e-05, "loss": 0.6429, "step": 3697, "task_loss": 1.936277985572815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5284555554389954, "epoch": 3.13, "learning_rate": 2.7671778770679874e-05, "loss": 0.5244, "step": 3698, "task_loss": 1.0220226049423218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30787354707717896, "epoch": 3.13, "learning_rate": 2.7665740852554038e-05, "loss": 0.4983, "step": 3699, "task_loss": 0.2537511885166168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45157623291015625, "epoch": 3.13, "learning_rate": 2.7659702934428212e-05, "loss": 0.5307, "step": 3700, "task_loss": 0.8245981931686401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9000365138053894, "epoch": 3.13, "learning_rate": 2.7653665016302382e-05, "loss": 0.6308, "step": 3701, "task_loss": 0.4727192521095276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8680077791213989, "epoch": 3.13, "learning_rate": 2.764762709817655e-05, "loss": 0.5058, "step": 3702, "task_loss": 0.727418065071106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22678235173225403, "epoch": 3.13, "learning_rate": 2.764158918005072e-05, "loss": 0.5834, "step": 3703, "task_loss": 0.23429925739765167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6659947633743286, "epoch": 3.13, "learning_rate": 2.763555126192489e-05, "loss": 0.5287, "step": 3704, "task_loss": 0.6540741920471191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.507392942905426, "epoch": 3.13, "learning_rate": 2.7629513343799058e-05, "loss": 0.5737, "step": 3705, "task_loss": 1.5640809535980225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6965770721435547, "epoch": 3.13, "learning_rate": 2.762347542567323e-05, "loss": 0.5183, "step": 3706, "task_loss": 0.5658238530158997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44030606746673584, "epoch": 3.13, "learning_rate": 2.76174375075474e-05, "loss": 0.3881, "step": 3707, "task_loss": 0.5730239748954773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25043147802352905, "epoch": 3.13, "learning_rate": 2.7611399589421566e-05, "loss": 0.4959, "step": 3708, "task_loss": 0.06971047818660736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8032805323600769, "epoch": 3.14, "learning_rate": 2.7605361671295737e-05, "loss": 0.644, "step": 3709, "task_loss": 0.6950515508651733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42279842495918274, "epoch": 3.14, "learning_rate": 2.7599323753169907e-05, "loss": 0.4404, "step": 3710, "task_loss": 1.0124675035476685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5459520816802979, "epoch": 3.14, "learning_rate": 2.759328583504408e-05, "loss": 0.4325, "step": 3711, "task_loss": 1.7948073148727417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3870251774787903, "epoch": 3.14, "learning_rate": 2.7587247916918245e-05, "loss": 0.4377, "step": 3712, "task_loss": 0.5671047568321228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4148760735988617, "epoch": 3.14, "learning_rate": 2.758120999879242e-05, "loss": 0.4798, "step": 3713, "task_loss": 0.6959971785545349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45963677763938904, "epoch": 3.14, "learning_rate": 2.757517208066659e-05, "loss": 0.4669, "step": 3714, "task_loss": 0.2200625091791153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2766980230808258, "epoch": 3.14, "learning_rate": 2.7569134162540754e-05, "loss": 0.3921, "step": 3715, "task_loss": 0.15562470257282257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.738599419593811, "epoch": 3.14, "learning_rate": 2.7563096244414927e-05, "loss": 0.4424, "step": 3716, "task_loss": 0.23788122832775116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4896979033946991, "epoch": 3.14, "learning_rate": 2.7557058326289098e-05, "loss": 0.5829, "step": 3717, "task_loss": 0.9387816190719604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5465485453605652, "epoch": 3.14, "learning_rate": 2.7551020408163265e-05, "loss": 0.4874, "step": 3718, "task_loss": 0.9598830938339233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20556646585464478, "epoch": 3.14, "learning_rate": 2.7544982490037436e-05, "loss": 0.4557, "step": 3719, "task_loss": 0.9538556337356567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3058308959007263, "epoch": 3.14, "learning_rate": 2.7538944571911606e-05, "loss": 0.3891, "step": 3720, "task_loss": 0.38971152901649475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5063776969909668, "epoch": 3.15, "learning_rate": 2.7532906653785777e-05, "loss": 0.4321, "step": 3721, "task_loss": 1.1873595714569092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3801400065422058, "epoch": 3.15, "learning_rate": 2.7526868735659944e-05, "loss": 0.4886, "step": 3722, "task_loss": 0.8118539452552795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44611361622810364, "epoch": 3.15, "learning_rate": 2.7520830817534115e-05, "loss": 0.4822, "step": 3723, "task_loss": 0.8867009282112122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5441555976867676, "epoch": 3.15, "learning_rate": 2.751479289940829e-05, "loss": 0.5111, "step": 3724, "task_loss": 0.10371962934732437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2484896034002304, "epoch": 3.15, "learning_rate": 2.7508754981282452e-05, "loss": 0.4787, "step": 3725, "task_loss": 0.21225924789905548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5826791524887085, "epoch": 3.15, "learning_rate": 2.7502717063156623e-05, "loss": 0.4439, "step": 3726, "task_loss": 1.0200649499893188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2507043480873108, "epoch": 3.15, "learning_rate": 2.7496679145030797e-05, "loss": 0.3504, "step": 3727, "task_loss": 0.4424891173839569 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30742576718330383, "epoch": 3.15, "learning_rate": 2.749064122690496e-05, "loss": 0.5144, "step": 3728, "task_loss": 0.4725780785083771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4994845390319824, "epoch": 3.15, "learning_rate": 2.7484603308779135e-05, "loss": 0.5162, "step": 3729, "task_loss": 0.4429226815700531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44574034214019775, "epoch": 3.15, "learning_rate": 2.7478565390653305e-05, "loss": 0.4045, "step": 3730, "task_loss": 0.6407784819602966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.1141254901885986, "epoch": 3.15, "learning_rate": 2.7472527472527476e-05, "loss": 0.5434, "step": 3731, "task_loss": 0.8655601739883423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3691915273666382, "epoch": 3.15, "learning_rate": 2.7466489554401643e-05, "loss": 0.438, "step": 3732, "task_loss": 0.6814383268356323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5573469996452332, "epoch": 3.16, "learning_rate": 2.7460451636275814e-05, "loss": 0.4386, "step": 3733, "task_loss": 0.5748635530471802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.338331013917923, "epoch": 3.16, "learning_rate": 2.7454413718149984e-05, "loss": 0.455, "step": 3734, "task_loss": 0.8077346086502075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38117092847824097, "epoch": 3.16, "learning_rate": 2.744837580002415e-05, "loss": 0.4504, "step": 3735, "task_loss": 0.8835389614105225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40458667278289795, "epoch": 3.16, "learning_rate": 2.7442337881898322e-05, "loss": 0.4667, "step": 3736, "task_loss": 0.6983146071434021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5375289916992188, "epoch": 3.16, "learning_rate": 2.7436299963772493e-05, "loss": 0.4564, "step": 3737, "task_loss": 0.8066797256469727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5554118752479553, "epoch": 3.16, "learning_rate": 2.743026204564666e-05, "loss": 0.5331, "step": 3738, "task_loss": 1.4071393013000488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20875763893127441, "epoch": 3.16, "learning_rate": 2.742422412752083e-05, "loss": 0.387, "step": 3739, "task_loss": 0.11837195605039597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4855177402496338, "epoch": 3.16, "learning_rate": 2.7418186209395004e-05, "loss": 0.5143, "step": 3740, "task_loss": 1.2160515785217285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4730674624443054, "epoch": 3.16, "learning_rate": 2.7412148291269175e-05, "loss": 0.4243, "step": 3741, "task_loss": 0.6971266865730286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4028478264808655, "epoch": 3.16, "learning_rate": 2.740611037314334e-05, "loss": 0.5777, "step": 3742, "task_loss": 0.9248182773590088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37637588381767273, "epoch": 3.16, "learning_rate": 2.7400072455017513e-05, "loss": 0.5109, "step": 3743, "task_loss": 0.9584378004074097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3775970935821533, "epoch": 3.16, "learning_rate": 2.7394034536891683e-05, "loss": 0.477, "step": 3744, "task_loss": 0.8791408538818359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.714202880859375, "epoch": 3.17, "learning_rate": 2.738799661876585e-05, "loss": 0.519, "step": 3745, "task_loss": 0.5660610198974609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.333614706993103, "epoch": 3.17, "learning_rate": 2.738195870064002e-05, "loss": 0.4682, "step": 3746, "task_loss": 0.7252306938171387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5899533033370972, "epoch": 3.17, "learning_rate": 2.737592078251419e-05, "loss": 0.4334, "step": 3747, "task_loss": 0.354643851518631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43606680631637573, "epoch": 3.17, "learning_rate": 2.736988286438836e-05, "loss": 0.4694, "step": 3748, "task_loss": 0.9910557270050049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5919773578643799, "epoch": 3.17, "learning_rate": 2.736384494626253e-05, "loss": 0.5916, "step": 3749, "task_loss": 0.3641583025455475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32916560769081116, "epoch": 3.17, "learning_rate": 2.73578070281367e-05, "loss": 0.3928, "step": 3750, "task_loss": 0.2942343056201935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24297593533992767, "epoch": 3.17, "learning_rate": 2.735176911001087e-05, "loss": 0.4142, "step": 3751, "task_loss": 0.35717228055000305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21526533365249634, "epoch": 3.17, "learning_rate": 2.7345731191885038e-05, "loss": 0.4232, "step": 3752, "task_loss": 0.41626495122909546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4241836667060852, "epoch": 3.17, "learning_rate": 2.7339693273759208e-05, "loss": 0.5549, "step": 3753, "task_loss": 0.5072826743125916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30510959029197693, "epoch": 3.17, "learning_rate": 2.7333655355633382e-05, "loss": 0.5329, "step": 3754, "task_loss": 0.9714002013206482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2840180993080139, "epoch": 3.17, "learning_rate": 2.7327617437507546e-05, "loss": 0.4045, "step": 3755, "task_loss": 0.49276480078697205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38009005784988403, "epoch": 3.17, "learning_rate": 2.7321579519381716e-05, "loss": 0.4147, "step": 3756, "task_loss": 0.07532393932342529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28854674100875854, "epoch": 3.18, "learning_rate": 2.731554160125589e-05, "loss": 0.4888, "step": 3757, "task_loss": 0.6859286427497864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3869174122810364, "epoch": 3.18, "learning_rate": 2.7309503683130054e-05, "loss": 0.4742, "step": 3758, "task_loss": 0.7985444068908691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45360690355300903, "epoch": 3.18, "learning_rate": 2.7303465765004228e-05, "loss": 0.5361, "step": 3759, "task_loss": 1.1029295921325684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6545538902282715, "epoch": 3.18, "learning_rate": 2.72974278468784e-05, "loss": 0.5627, "step": 3760, "task_loss": 0.49392327666282654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5861141085624695, "epoch": 3.18, "learning_rate": 2.729138992875257e-05, "loss": 0.5693, "step": 3761, "task_loss": 0.4167341887950897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38020139932632446, "epoch": 3.18, "learning_rate": 2.7285352010626736e-05, "loss": 0.4231, "step": 3762, "task_loss": 0.5080010294914246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.491394579410553, "epoch": 3.18, "learning_rate": 2.7279314092500907e-05, "loss": 0.4521, "step": 3763, "task_loss": 0.3156215250492096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31411808729171753, "epoch": 3.18, "learning_rate": 2.7273276174375078e-05, "loss": 0.4016, "step": 3764, "task_loss": 1.0664377212524414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5820175409317017, "epoch": 3.18, "learning_rate": 2.7267238256249245e-05, "loss": 0.5676, "step": 3765, "task_loss": 0.41054269671440125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4979381561279297, "epoch": 3.18, "learning_rate": 2.7261200338123415e-05, "loss": 0.4004, "step": 3766, "task_loss": 0.7618645429611206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4897502660751343, "epoch": 3.18, "learning_rate": 2.7255162419997586e-05, "loss": 0.4194, "step": 3767, "task_loss": 0.7769930958747864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5898454189300537, "epoch": 3.19, "learning_rate": 2.7249124501871753e-05, "loss": 0.3941, "step": 3768, "task_loss": 1.044713020324707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4493062496185303, "epoch": 3.19, "learning_rate": 2.7243086583745924e-05, "loss": 0.5024, "step": 3769, "task_loss": 0.6794841289520264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4119808077812195, "epoch": 3.19, "learning_rate": 2.7237048665620098e-05, "loss": 0.4949, "step": 3770, "task_loss": 0.46491485834121704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5707474946975708, "epoch": 3.19, "learning_rate": 2.7231010747494268e-05, "loss": 0.603, "step": 3771, "task_loss": 1.2682360410690308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2518356144428253, "epoch": 3.19, "learning_rate": 2.7224972829368432e-05, "loss": 0.3823, "step": 3772, "task_loss": 0.5874207019805908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40799447894096375, "epoch": 3.19, "learning_rate": 2.7218934911242606e-05, "loss": 0.6104, "step": 3773, "task_loss": 1.5342707633972168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40552014112472534, "epoch": 3.19, "learning_rate": 2.7212896993116777e-05, "loss": 0.4298, "step": 3774, "task_loss": 0.921972393989563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6122440695762634, "epoch": 3.19, "learning_rate": 2.7206859074990944e-05, "loss": 0.5405, "step": 3775, "task_loss": 1.1269055604934692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3796578347682953, "epoch": 3.19, "learning_rate": 2.7200821156865114e-05, "loss": 0.5684, "step": 3776, "task_loss": 0.4733431041240692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6131625771522522, "epoch": 3.19, "learning_rate": 2.7194783238739285e-05, "loss": 0.5599, "step": 3777, "task_loss": 0.8163983821868896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42517492175102234, "epoch": 3.19, "learning_rate": 2.7188745320613452e-05, "loss": 0.472, "step": 3778, "task_loss": 0.328088641166687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48396608233451843, "epoch": 3.19, "learning_rate": 2.7182707402487623e-05, "loss": 0.5105, "step": 3779, "task_loss": 0.7976745963096619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4718230962753296, "epoch": 3.2, "learning_rate": 2.7176669484361793e-05, "loss": 0.541, "step": 3780, "task_loss": 1.1464358568191528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45221805572509766, "epoch": 3.2, "learning_rate": 2.7170631566235967e-05, "loss": 0.4009, "step": 3781, "task_loss": 0.39147815108299255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4125732183456421, "epoch": 3.2, "learning_rate": 2.716459364811013e-05, "loss": 0.3852, "step": 3782, "task_loss": 1.4714211225509644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5703153610229492, "epoch": 3.2, "learning_rate": 2.71585557299843e-05, "loss": 0.502, "step": 3783, "task_loss": 0.6144688725471497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3628414273262024, "epoch": 3.2, "learning_rate": 2.7152517811858476e-05, "loss": 0.5009, "step": 3784, "task_loss": 0.5183252096176147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6770614385604858, "epoch": 3.2, "learning_rate": 2.714647989373264e-05, "loss": 0.4805, "step": 3785, "task_loss": 0.7490726709365845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4068323075771332, "epoch": 3.2, "learning_rate": 2.7140441975606813e-05, "loss": 0.4481, "step": 3786, "task_loss": 0.2946569621562958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5132280588150024, "epoch": 3.2, "learning_rate": 2.7134404057480984e-05, "loss": 0.3899, "step": 3787, "task_loss": 0.8345959186553955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3237858712673187, "epoch": 3.2, "learning_rate": 2.7128366139355148e-05, "loss": 0.5174, "step": 3788, "task_loss": 0.7486493587493896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3196137845516205, "epoch": 3.2, "learning_rate": 2.712232822122932e-05, "loss": 0.4429, "step": 3789, "task_loss": 0.542823851108551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5428749918937683, "epoch": 3.2, "learning_rate": 2.7116290303103492e-05, "loss": 0.5271, "step": 3790, "task_loss": 0.4293018579483032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35533761978149414, "epoch": 3.2, "learning_rate": 2.7110252384977663e-05, "loss": 0.4064, "step": 3791, "task_loss": 0.5869625210762024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39605167508125305, "epoch": 3.21, "learning_rate": 2.710421446685183e-05, "loss": 0.4083, "step": 3792, "task_loss": 0.5537815690040588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5658808946609497, "epoch": 3.21, "learning_rate": 2.7098176548726e-05, "loss": 0.5733, "step": 3793, "task_loss": 0.8816405534744263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3438453674316406, "epoch": 3.21, "learning_rate": 2.709213863060017e-05, "loss": 0.4143, "step": 3794, "task_loss": 0.31732484698295593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4008070230484009, "epoch": 3.21, "learning_rate": 2.7086100712474338e-05, "loss": 0.4573, "step": 3795, "task_loss": 1.3543620109558105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25323665142059326, "epoch": 3.21, "learning_rate": 2.708006279434851e-05, "loss": 0.3714, "step": 3796, "task_loss": 0.8407528400421143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36445048451423645, "epoch": 3.21, "learning_rate": 2.7074024876222683e-05, "loss": 0.4028, "step": 3797, "task_loss": 0.47418949007987976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49445000290870667, "epoch": 3.21, "learning_rate": 2.7067986958096847e-05, "loss": 0.4826, "step": 3798, "task_loss": 0.38024118542671204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6112021207809448, "epoch": 3.21, "learning_rate": 2.7061949039971017e-05, "loss": 0.5987, "step": 3799, "task_loss": 0.40103036165237427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6019525527954102, "epoch": 3.21, "learning_rate": 2.705591112184519e-05, "loss": 0.5076, "step": 3800, "task_loss": 0.47336408495903015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4652875065803528, "epoch": 3.21, "learning_rate": 2.704987320371936e-05, "loss": 0.5205, "step": 3801, "task_loss": 0.45307591557502747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4914754629135132, "epoch": 3.21, "learning_rate": 2.704383528559353e-05, "loss": 0.4339, "step": 3802, "task_loss": 0.7431097030639648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5153275728225708, "epoch": 3.21, "learning_rate": 2.70377973674677e-05, "loss": 0.4591, "step": 3803, "task_loss": 0.6506975889205933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4581150412559509, "epoch": 3.22, "learning_rate": 2.703175944934187e-05, "loss": 0.5647, "step": 3804, "task_loss": 1.1504069566726685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.67653489112854, "epoch": 3.22, "learning_rate": 2.7025721531216037e-05, "loss": 0.6294, "step": 3805, "task_loss": 0.4851398468017578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4237484335899353, "epoch": 3.22, "learning_rate": 2.7019683613090208e-05, "loss": 0.408, "step": 3806, "task_loss": 0.3986663222312927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29001909494400024, "epoch": 3.22, "learning_rate": 2.701364569496438e-05, "loss": 0.429, "step": 3807, "task_loss": 0.8679349422454834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3247288465499878, "epoch": 3.22, "learning_rate": 2.7007607776838545e-05, "loss": 0.4084, "step": 3808, "task_loss": 0.5440003275871277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6273398995399475, "epoch": 3.22, "learning_rate": 2.7001569858712716e-05, "loss": 0.6808, "step": 3809, "task_loss": 0.5788863301277161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28838780522346497, "epoch": 3.22, "learning_rate": 2.6995531940586887e-05, "loss": 0.5086, "step": 3810, "task_loss": 0.8565546870231628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4979211688041687, "epoch": 3.22, "learning_rate": 2.698949402246106e-05, "loss": 0.4762, "step": 3811, "task_loss": 0.9625173211097717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2501160800457001, "epoch": 3.22, "learning_rate": 2.6983456104335224e-05, "loss": 0.3566, "step": 3812, "task_loss": 0.30630674958229065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39313554763793945, "epoch": 3.22, "learning_rate": 2.6977418186209395e-05, "loss": 0.4232, "step": 3813, "task_loss": 0.2287854552268982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5327298045158386, "epoch": 3.22, "learning_rate": 2.697138026808357e-05, "loss": 0.4079, "step": 3814, "task_loss": 0.8478198647499084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41623547673225403, "epoch": 3.22, "learning_rate": 2.6965342349957733e-05, "loss": 0.4801, "step": 3815, "task_loss": 1.6407214403152466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5664340257644653, "epoch": 3.23, "learning_rate": 2.6959304431831907e-05, "loss": 0.6015, "step": 3816, "task_loss": 1.565561294555664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4402120113372803, "epoch": 3.23, "learning_rate": 2.6953266513706077e-05, "loss": 0.4225, "step": 3817, "task_loss": 0.8757083415985107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3454766869544983, "epoch": 3.23, "learning_rate": 2.6947228595580244e-05, "loss": 0.4501, "step": 3818, "task_loss": 0.7579325437545776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5359245538711548, "epoch": 3.23, "learning_rate": 2.6941190677454415e-05, "loss": 0.4633, "step": 3819, "task_loss": 0.6636700630187988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4578399658203125, "epoch": 3.23, "learning_rate": 2.6935152759328586e-05, "loss": 0.5001, "step": 3820, "task_loss": 1.1944279670715332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4239676594734192, "epoch": 3.23, "learning_rate": 2.6929114841202756e-05, "loss": 0.5077, "step": 3821, "task_loss": 0.6618179082870483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6649296283721924, "epoch": 3.23, "learning_rate": 2.6923076923076923e-05, "loss": 0.6076, "step": 3822, "task_loss": 1.778846263885498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3073103725910187, "epoch": 3.23, "learning_rate": 2.6917039004951094e-05, "loss": 0.3501, "step": 3823, "task_loss": 0.49037855863571167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22118458151817322, "epoch": 3.23, "learning_rate": 2.6911001086825264e-05, "loss": 0.52, "step": 3824, "task_loss": 0.5346813201904297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31897056102752686, "epoch": 3.23, "learning_rate": 2.690496316869943e-05, "loss": 0.4522, "step": 3825, "task_loss": 0.2855951488018036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.357305109500885, "epoch": 3.23, "learning_rate": 2.6898925250573602e-05, "loss": 0.4098, "step": 3826, "task_loss": 0.3003218472003937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40114837884902954, "epoch": 3.23, "learning_rate": 2.6892887332447776e-05, "loss": 0.409, "step": 3827, "task_loss": 0.32115429639816284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4808717668056488, "epoch": 3.24, "learning_rate": 2.688684941432194e-05, "loss": 0.5155, "step": 3828, "task_loss": 0.6875206232070923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.345496267080307, "epoch": 3.24, "learning_rate": 2.688081149619611e-05, "loss": 0.4076, "step": 3829, "task_loss": 0.4778772294521332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38118451833724976, "epoch": 3.24, "learning_rate": 2.6874773578070285e-05, "loss": 0.438, "step": 3830, "task_loss": 0.4496663808822632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.347861111164093, "epoch": 3.24, "learning_rate": 2.6868735659944455e-05, "loss": 0.3312, "step": 3831, "task_loss": 0.7432055473327637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36968931555747986, "epoch": 3.24, "learning_rate": 2.6862697741818622e-05, "loss": 0.4154, "step": 3832, "task_loss": 1.435799241065979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45677560567855835, "epoch": 3.24, "learning_rate": 2.6856659823692793e-05, "loss": 0.4434, "step": 3833, "task_loss": 0.41208332777023315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2436949759721756, "epoch": 3.24, "learning_rate": 2.6850621905566963e-05, "loss": 0.4, "step": 3834, "task_loss": 0.7615671157836914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4145451784133911, "epoch": 3.24, "learning_rate": 2.684458398744113e-05, "loss": 0.4543, "step": 3835, "task_loss": 1.2085413932800293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47663623094558716, "epoch": 3.24, "learning_rate": 2.68385460693153e-05, "loss": 0.4517, "step": 3836, "task_loss": 1.24737548828125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37434259057044983, "epoch": 3.24, "learning_rate": 2.6832508151189472e-05, "loss": 0.4088, "step": 3837, "task_loss": 0.35024309158325195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.354168176651001, "epoch": 3.24, "learning_rate": 2.682647023306364e-05, "loss": 0.4743, "step": 3838, "task_loss": 0.3673134744167328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24680231511592865, "epoch": 3.24, "learning_rate": 2.682043231493781e-05, "loss": 0.5686, "step": 3839, "task_loss": 0.1995398849248886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.372487336397171, "epoch": 3.25, "learning_rate": 2.681439439681198e-05, "loss": 0.3815, "step": 3840, "task_loss": 0.4938301742076874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4129903018474579, "epoch": 3.25, "learning_rate": 2.6808356478686154e-05, "loss": 0.3678, "step": 3841, "task_loss": 0.9122922420501709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4061970114707947, "epoch": 3.25, "learning_rate": 2.6802318560560318e-05, "loss": 0.368, "step": 3842, "task_loss": 0.4682944416999817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.190869927406311, "epoch": 3.25, "learning_rate": 2.6796280642434492e-05, "loss": 0.6416, "step": 3843, "task_loss": 1.0394885540008545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2691495418548584, "epoch": 3.25, "learning_rate": 2.6790242724308662e-05, "loss": 0.4088, "step": 3844, "task_loss": 0.5790249109268188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4423500597476959, "epoch": 3.25, "learning_rate": 2.6784204806182826e-05, "loss": 0.4532, "step": 3845, "task_loss": 0.3425118327140808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3186015784740448, "epoch": 3.25, "learning_rate": 2.6778166888057e-05, "loss": 0.3059, "step": 3846, "task_loss": 0.277888685464859 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4284451901912689, "epoch": 3.25, "learning_rate": 2.677212896993117e-05, "loss": 0.652, "step": 3847, "task_loss": 0.6483716368675232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30726659297943115, "epoch": 3.25, "learning_rate": 2.6766091051805338e-05, "loss": 0.4729, "step": 3848, "task_loss": 1.0524650812149048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5764768123626709, "epoch": 3.25, "learning_rate": 2.676005313367951e-05, "loss": 0.6217, "step": 3849, "task_loss": 1.6158502101898193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28240132331848145, "epoch": 3.25, "learning_rate": 2.675401521555368e-05, "loss": 0.3322, "step": 3850, "task_loss": 0.03975025936961174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6292554140090942, "epoch": 3.26, "learning_rate": 2.674797729742785e-05, "loss": 0.4032, "step": 3851, "task_loss": 0.4748497009277344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2582562267780304, "epoch": 3.26, "learning_rate": 2.6741939379302017e-05, "loss": 0.5102, "step": 3852, "task_loss": 0.7279181480407715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3350021243095398, "epoch": 3.26, "learning_rate": 2.6735901461176187e-05, "loss": 0.336, "step": 3853, "task_loss": 0.2806888520717621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33100566267967224, "epoch": 3.26, "learning_rate": 2.672986354305036e-05, "loss": 0.4406, "step": 3854, "task_loss": 0.7229077219963074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4337646961212158, "epoch": 3.26, "learning_rate": 2.6723825624924525e-05, "loss": 0.4537, "step": 3855, "task_loss": 0.6629780530929565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2619765102863312, "epoch": 3.26, "learning_rate": 2.6717787706798696e-05, "loss": 0.3429, "step": 3856, "task_loss": 1.5183417797088623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5419613718986511, "epoch": 3.26, "learning_rate": 2.671174978867287e-05, "loss": 0.3802, "step": 3857, "task_loss": 0.29992806911468506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6280199289321899, "epoch": 3.26, "learning_rate": 2.6705711870547033e-05, "loss": 0.5287, "step": 3858, "task_loss": 1.1595789194107056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3569902777671814, "epoch": 3.26, "learning_rate": 2.6699673952421207e-05, "loss": 0.3865, "step": 3859, "task_loss": 0.2400752305984497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40060263872146606, "epoch": 3.26, "learning_rate": 2.6693636034295378e-05, "loss": 0.4755, "step": 3860, "task_loss": 0.5237710475921631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7523991465568542, "epoch": 3.26, "learning_rate": 2.668759811616955e-05, "loss": 0.4922, "step": 3861, "task_loss": 1.8051953315734863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2729555070400238, "epoch": 3.26, "learning_rate": 2.6681560198043716e-05, "loss": 0.4909, "step": 3862, "task_loss": 0.2702052593231201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26777124404907227, "epoch": 3.27, "learning_rate": 2.6675522279917886e-05, "loss": 0.458, "step": 3863, "task_loss": 0.2686586081981659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3385579586029053, "epoch": 3.27, "learning_rate": 2.6669484361792057e-05, "loss": 0.3781, "step": 3864, "task_loss": 0.49498671293258667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3276691436767578, "epoch": 3.27, "learning_rate": 2.6663446443666224e-05, "loss": 0.3613, "step": 3865, "task_loss": 0.43839287757873535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47337931394577026, "epoch": 3.27, "learning_rate": 2.6657408525540395e-05, "loss": 0.4609, "step": 3866, "task_loss": 0.6574046611785889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3899953365325928, "epoch": 3.27, "learning_rate": 2.6651370607414565e-05, "loss": 0.5545, "step": 3867, "task_loss": 0.5102879405021667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2467774748802185, "epoch": 3.27, "learning_rate": 2.6645332689288732e-05, "loss": 0.366, "step": 3868, "task_loss": 0.16469542682170868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8610694408416748, "epoch": 3.27, "learning_rate": 2.6639294771162903e-05, "loss": 0.6143, "step": 3869, "task_loss": 1.0843948125839233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4005817770957947, "epoch": 3.27, "learning_rate": 2.6633256853037077e-05, "loss": 0.6313, "step": 3870, "task_loss": 0.8859631419181824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34705132246017456, "epoch": 3.27, "learning_rate": 2.6627218934911247e-05, "loss": 0.4313, "step": 3871, "task_loss": 1.4726577997207642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5918349027633667, "epoch": 3.27, "learning_rate": 2.662118101678541e-05, "loss": 0.6443, "step": 3872, "task_loss": 0.7075425982475281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6282655000686646, "epoch": 3.27, "learning_rate": 2.6615143098659585e-05, "loss": 0.4676, "step": 3873, "task_loss": 0.7163531184196472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24590426683425903, "epoch": 3.27, "learning_rate": 2.6609105180533756e-05, "loss": 0.3017, "step": 3874, "task_loss": 0.23999513685703278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30201759934425354, "epoch": 3.28, "learning_rate": 2.6603067262407923e-05, "loss": 0.573, "step": 3875, "task_loss": 0.7882739305496216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4360301196575165, "epoch": 3.28, "learning_rate": 2.6597029344282094e-05, "loss": 0.4593, "step": 3876, "task_loss": 0.6721305847167969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3602806329727173, "epoch": 3.28, "learning_rate": 2.6590991426156264e-05, "loss": 0.5997, "step": 3877, "task_loss": 0.3244798481464386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29144486784935, "epoch": 3.28, "learning_rate": 2.658495350803043e-05, "loss": 0.408, "step": 3878, "task_loss": 1.0748825073242188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5761377215385437, "epoch": 3.28, "learning_rate": 2.6578915589904602e-05, "loss": 0.5067, "step": 3879, "task_loss": 0.27506694197654724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43032532930374146, "epoch": 3.28, "learning_rate": 2.6572877671778772e-05, "loss": 0.5179, "step": 3880, "task_loss": 1.8519164323806763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22886839509010315, "epoch": 3.28, "learning_rate": 2.656683975365294e-05, "loss": 0.3698, "step": 3881, "task_loss": 0.32505083084106445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6251912117004395, "epoch": 3.28, "learning_rate": 2.656080183552711e-05, "loss": 0.5217, "step": 3882, "task_loss": 0.5983681678771973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4213094115257263, "epoch": 3.28, "learning_rate": 2.655476391740128e-05, "loss": 0.4221, "step": 3883, "task_loss": 0.7905737161636353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4257015585899353, "epoch": 3.28, "learning_rate": 2.6548725999275455e-05, "loss": 0.4522, "step": 3884, "task_loss": 1.2116531133651733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2563192844390869, "epoch": 3.28, "learning_rate": 2.654268808114962e-05, "loss": 0.4299, "step": 3885, "task_loss": 0.03786962479352951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7463604211807251, "epoch": 3.28, "learning_rate": 2.653665016302379e-05, "loss": 0.5599, "step": 3886, "task_loss": 0.6209054589271545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4513862729072571, "epoch": 3.29, "learning_rate": 2.6530612244897963e-05, "loss": 0.4596, "step": 3887, "task_loss": 0.6460260152816772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8139039278030396, "epoch": 3.29, "learning_rate": 2.6524574326772127e-05, "loss": 0.5425, "step": 3888, "task_loss": 1.0061540603637695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27647221088409424, "epoch": 3.29, "learning_rate": 2.65185364086463e-05, "loss": 0.4866, "step": 3889, "task_loss": 1.1651498079299927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3041834831237793, "epoch": 3.29, "learning_rate": 2.651249849052047e-05, "loss": 0.4267, "step": 3890, "task_loss": 0.7122364044189453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3995535671710968, "epoch": 3.29, "learning_rate": 2.650646057239464e-05, "loss": 0.4217, "step": 3891, "task_loss": 0.17358238995075226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2713437080383301, "epoch": 3.29, "learning_rate": 2.650042265426881e-05, "loss": 0.3836, "step": 3892, "task_loss": 0.49749836325645447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4826926589012146, "epoch": 3.29, "learning_rate": 2.649438473614298e-05, "loss": 0.387, "step": 3893, "task_loss": 0.9294368028640747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3614562749862671, "epoch": 3.29, "learning_rate": 2.648834681801715e-05, "loss": 0.4223, "step": 3894, "task_loss": 0.27526578307151794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3350338339805603, "epoch": 3.29, "learning_rate": 2.6482308899891317e-05, "loss": 0.4191, "step": 3895, "task_loss": 0.4981161952018738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22239771485328674, "epoch": 3.29, "learning_rate": 2.6476270981765488e-05, "loss": 0.3978, "step": 3896, "task_loss": 0.7405861616134644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6606442332267761, "epoch": 3.29, "learning_rate": 2.647023306363966e-05, "loss": 0.5621, "step": 3897, "task_loss": 1.5681356191635132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3203236758708954, "epoch": 3.29, "learning_rate": 2.6464195145513826e-05, "loss": 0.4922, "step": 3898, "task_loss": 1.0353624820709229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35410240292549133, "epoch": 3.3, "learning_rate": 2.6458157227387996e-05, "loss": 0.4372, "step": 3899, "task_loss": 1.347322702407837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5725029706954956, "epoch": 3.3, "learning_rate": 2.645211930926217e-05, "loss": 0.4005, "step": 3900, "task_loss": 1.6407620906829834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23971456289291382, "epoch": 3.3, "learning_rate": 2.6446081391136334e-05, "loss": 0.3719, "step": 3901, "task_loss": 0.4884962737560272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40868496894836426, "epoch": 3.3, "learning_rate": 2.6440043473010505e-05, "loss": 0.4589, "step": 3902, "task_loss": 1.399366021156311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28902000188827515, "epoch": 3.3, "learning_rate": 2.643400555488468e-05, "loss": 0.4071, "step": 3903, "task_loss": 1.2724616527557373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5272896885871887, "epoch": 3.3, "learning_rate": 2.642796763675885e-05, "loss": 0.4675, "step": 3904, "task_loss": 1.0054192543029785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3024187684059143, "epoch": 3.3, "learning_rate": 2.6421929718633016e-05, "loss": 0.4644, "step": 3905, "task_loss": 0.6374460458755493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7154397964477539, "epoch": 3.3, "learning_rate": 2.6415891800507187e-05, "loss": 0.467, "step": 3906, "task_loss": 0.6932139992713928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.538710355758667, "epoch": 3.3, "learning_rate": 2.6409853882381357e-05, "loss": 0.5191, "step": 3907, "task_loss": 1.026183009147644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3393324911594391, "epoch": 3.3, "learning_rate": 2.6403815964255525e-05, "loss": 0.4349, "step": 3908, "task_loss": 0.49063408374786377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35550034046173096, "epoch": 3.3, "learning_rate": 2.6397778046129695e-05, "loss": 0.4914, "step": 3909, "task_loss": 1.211669921875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5305246114730835, "epoch": 3.3, "learning_rate": 2.6391740128003866e-05, "loss": 0.5513, "step": 3910, "task_loss": 0.533267617225647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27329254150390625, "epoch": 3.31, "learning_rate": 2.6385702209878033e-05, "loss": 0.4564, "step": 3911, "task_loss": 0.10061242431402206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34398797154426575, "epoch": 3.31, "learning_rate": 2.6379664291752204e-05, "loss": 0.5167, "step": 3912, "task_loss": 0.5979923009872437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4134410321712494, "epoch": 3.31, "learning_rate": 2.6373626373626374e-05, "loss": 0.4573, "step": 3913, "task_loss": 0.6319769620895386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21490710973739624, "epoch": 3.31, "learning_rate": 2.6367588455500548e-05, "loss": 0.3959, "step": 3914, "task_loss": 0.13120034337043762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45233333110809326, "epoch": 3.31, "learning_rate": 2.6361550537374712e-05, "loss": 0.4044, "step": 3915, "task_loss": 0.6892135739326477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8334289789199829, "epoch": 3.31, "learning_rate": 2.6355512619248886e-05, "loss": 0.481, "step": 3916, "task_loss": 1.0999146699905396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43027234077453613, "epoch": 3.31, "learning_rate": 2.6349474701123056e-05, "loss": 0.5899, "step": 3917, "task_loss": 0.28492867946624756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3703644871711731, "epoch": 3.31, "learning_rate": 2.634343678299722e-05, "loss": 0.4369, "step": 3918, "task_loss": 0.4361274242401123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3093618154525757, "epoch": 3.31, "learning_rate": 2.6337398864871394e-05, "loss": 0.4205, "step": 3919, "task_loss": 0.9961596131324768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5397971868515015, "epoch": 3.31, "learning_rate": 2.6331360946745565e-05, "loss": 0.4885, "step": 3920, "task_loss": 0.667671263217926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35227346420288086, "epoch": 3.31, "learning_rate": 2.6325323028619732e-05, "loss": 0.5642, "step": 3921, "task_loss": 0.05814676731824875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5519534945487976, "epoch": 3.32, "learning_rate": 2.6319285110493903e-05, "loss": 0.5021, "step": 3922, "task_loss": 1.044471025466919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4674166142940521, "epoch": 3.32, "learning_rate": 2.6313247192368073e-05, "loss": 0.431, "step": 3923, "task_loss": 1.5034842491149902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6058449745178223, "epoch": 3.32, "learning_rate": 2.6307209274242244e-05, "loss": 0.4521, "step": 3924, "task_loss": 1.7140250205993652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21890950202941895, "epoch": 3.32, "learning_rate": 2.630117135611641e-05, "loss": 0.3724, "step": 3925, "task_loss": 0.8589317202568054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5559444427490234, "epoch": 3.32, "learning_rate": 2.629513343799058e-05, "loss": 0.4749, "step": 3926, "task_loss": 0.7565250396728516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35041147470474243, "epoch": 3.32, "learning_rate": 2.6289095519864755e-05, "loss": 0.4314, "step": 3927, "task_loss": 0.31393322348594666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4559428095817566, "epoch": 3.32, "learning_rate": 2.628305760173892e-05, "loss": 0.362, "step": 3928, "task_loss": 0.36873114109039307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5369764566421509, "epoch": 3.32, "learning_rate": 2.627701968361309e-05, "loss": 0.5304, "step": 3929, "task_loss": 0.41969671845436096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3492605686187744, "epoch": 3.32, "learning_rate": 2.6270981765487264e-05, "loss": 0.4066, "step": 3930, "task_loss": 0.5743944048881531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3832828998565674, "epoch": 3.32, "learning_rate": 2.6264943847361427e-05, "loss": 0.5658, "step": 3931, "task_loss": 0.3702782988548279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.66197669506073, "epoch": 3.32, "learning_rate": 2.62589059292356e-05, "loss": 0.453, "step": 3932, "task_loss": 0.14136435091495514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7945173382759094, "epoch": 3.32, "learning_rate": 2.6252868011109772e-05, "loss": 0.5631, "step": 3933, "task_loss": 1.1786843538284302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6458139419555664, "epoch": 3.33, "learning_rate": 2.6246830092983943e-05, "loss": 0.4151, "step": 3934, "task_loss": 0.9562245011329651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4598050117492676, "epoch": 3.33, "learning_rate": 2.624079217485811e-05, "loss": 0.5026, "step": 3935, "task_loss": 0.5333450436592102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5577046871185303, "epoch": 3.33, "learning_rate": 2.623475425673228e-05, "loss": 0.6408, "step": 3936, "task_loss": 1.101917028427124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30583178997039795, "epoch": 3.33, "learning_rate": 2.622871633860645e-05, "loss": 0.4103, "step": 3937, "task_loss": 0.25704365968704224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38211584091186523, "epoch": 3.33, "learning_rate": 2.6222678420480618e-05, "loss": 0.3993, "step": 3938, "task_loss": 0.6111950874328613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23534807562828064, "epoch": 3.33, "learning_rate": 2.621664050235479e-05, "loss": 0.2814, "step": 3939, "task_loss": 0.3402695953845978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6940446496009827, "epoch": 3.33, "learning_rate": 2.621060258422896e-05, "loss": 0.5475, "step": 3940, "task_loss": 0.31768837571144104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3031970262527466, "epoch": 3.33, "learning_rate": 2.6204564666103126e-05, "loss": 0.4661, "step": 3941, "task_loss": 0.8607670068740845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5796726942062378, "epoch": 3.33, "learning_rate": 2.6198526747977297e-05, "loss": 0.4711, "step": 3942, "task_loss": 1.0236773490905762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27072980999946594, "epoch": 3.33, "learning_rate": 2.6192488829851468e-05, "loss": 0.3737, "step": 3943, "task_loss": 0.09534290432929993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39296406507492065, "epoch": 3.33, "learning_rate": 2.618645091172564e-05, "loss": 0.3253, "step": 3944, "task_loss": 1.1961030960083008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2657291293144226, "epoch": 3.33, "learning_rate": 2.6180412993599805e-05, "loss": 0.4009, "step": 3945, "task_loss": 0.9193987250328064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5617117285728455, "epoch": 3.34, "learning_rate": 2.617437507547398e-05, "loss": 0.4531, "step": 3946, "task_loss": 0.7640877962112427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3065324127674103, "epoch": 3.34, "learning_rate": 2.616833715734815e-05, "loss": 0.403, "step": 3947, "task_loss": 0.08209220319986343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18636533617973328, "epoch": 3.34, "learning_rate": 2.6162299239222317e-05, "loss": 0.4545, "step": 3948, "task_loss": 0.4437941908836365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3777286112308502, "epoch": 3.34, "learning_rate": 2.6156261321096488e-05, "loss": 0.4714, "step": 3949, "task_loss": 0.5307972431182861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.284534215927124, "epoch": 3.34, "learning_rate": 2.6150223402970658e-05, "loss": 0.6217, "step": 3950, "task_loss": 0.3686329424381256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36853039264678955, "epoch": 3.34, "learning_rate": 2.6144185484844825e-05, "loss": 0.2979, "step": 3951, "task_loss": 0.8223134279251099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8026624917984009, "epoch": 3.34, "learning_rate": 2.6138147566718996e-05, "loss": 0.488, "step": 3952, "task_loss": 1.2354662418365479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6630476713180542, "epoch": 3.34, "learning_rate": 2.6132109648593166e-05, "loss": 0.5367, "step": 3953, "task_loss": 0.28533416986465454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47065824270248413, "epoch": 3.34, "learning_rate": 2.6126071730467337e-05, "loss": 0.3471, "step": 3954, "task_loss": 0.37911272048950195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3665674924850464, "epoch": 3.34, "learning_rate": 2.6120033812341504e-05, "loss": 0.5027, "step": 3955, "task_loss": 0.972766637802124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3628494143486023, "epoch": 3.34, "learning_rate": 2.6113995894215675e-05, "loss": 0.4444, "step": 3956, "task_loss": 0.6082914471626282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46786653995513916, "epoch": 3.34, "learning_rate": 2.610795797608985e-05, "loss": 0.4881, "step": 3957, "task_loss": 0.7733606696128845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37900471687316895, "epoch": 3.35, "learning_rate": 2.6101920057964013e-05, "loss": 0.4803, "step": 3958, "task_loss": 0.9288731217384338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5493066906929016, "epoch": 3.35, "learning_rate": 2.6095882139838183e-05, "loss": 0.5613, "step": 3959, "task_loss": 0.2994481325149536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29829126596450806, "epoch": 3.35, "learning_rate": 2.6089844221712357e-05, "loss": 0.4049, "step": 3960, "task_loss": 0.053297512233257294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39277398586273193, "epoch": 3.35, "learning_rate": 2.608380630358652e-05, "loss": 0.4991, "step": 3961, "task_loss": 0.5253458619117737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2920752763748169, "epoch": 3.35, "learning_rate": 2.6077768385460695e-05, "loss": 0.3523, "step": 3962, "task_loss": 0.5977457165718079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3123880624771118, "epoch": 3.35, "learning_rate": 2.6071730467334865e-05, "loss": 0.4951, "step": 3963, "task_loss": 1.1253337860107422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4913237690925598, "epoch": 3.35, "learning_rate": 2.6065692549209036e-05, "loss": 0.5225, "step": 3964, "task_loss": 1.0267446041107178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7077473402023315, "epoch": 3.35, "learning_rate": 2.6059654631083203e-05, "loss": 0.5156, "step": 3965, "task_loss": 0.602492094039917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4862809181213379, "epoch": 3.35, "learning_rate": 2.6053616712957374e-05, "loss": 0.3782, "step": 3966, "task_loss": 0.4878157079219818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6093763113021851, "epoch": 3.35, "learning_rate": 2.6047578794831544e-05, "loss": 0.4774, "step": 3967, "task_loss": 1.9639968872070312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45842796564102173, "epoch": 3.35, "learning_rate": 2.604154087670571e-05, "loss": 0.5411, "step": 3968, "task_loss": 1.160327434539795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3603624105453491, "epoch": 3.35, "learning_rate": 2.6035502958579882e-05, "loss": 0.7176, "step": 3969, "task_loss": 1.0591529607772827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2988588809967041, "epoch": 3.36, "learning_rate": 2.6029465040454053e-05, "loss": 0.4222, "step": 3970, "task_loss": 0.06635525822639465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5966510772705078, "epoch": 3.36, "learning_rate": 2.602342712232822e-05, "loss": 0.4731, "step": 3971, "task_loss": 1.3770787715911865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40883147716522217, "epoch": 3.36, "learning_rate": 2.601738920420239e-05, "loss": 0.4895, "step": 3972, "task_loss": 0.4181012213230133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35842689871788025, "epoch": 3.36, "learning_rate": 2.6011351286076564e-05, "loss": 0.5078, "step": 3973, "task_loss": 0.3913821578025818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3882879614830017, "epoch": 3.36, "learning_rate": 2.6005313367950735e-05, "loss": 0.3949, "step": 3974, "task_loss": 0.8234023451805115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.245476633310318, "epoch": 3.36, "learning_rate": 2.59992754498249e-05, "loss": 0.3781, "step": 3975, "task_loss": 0.7674228549003601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.504243016242981, "epoch": 3.36, "learning_rate": 2.5993237531699073e-05, "loss": 0.4886, "step": 3976, "task_loss": 0.3125545382499695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32846009731292725, "epoch": 3.36, "learning_rate": 2.5987199613573243e-05, "loss": 0.4252, "step": 3977, "task_loss": 0.678061306476593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.356419175863266, "epoch": 3.36, "learning_rate": 2.598116169544741e-05, "loss": 0.4716, "step": 3978, "task_loss": 0.5723683834075928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3754366636276245, "epoch": 3.36, "learning_rate": 2.597512377732158e-05, "loss": 0.5131, "step": 3979, "task_loss": 0.5808195471763611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4903452694416046, "epoch": 3.36, "learning_rate": 2.596908585919575e-05, "loss": 0.506, "step": 3980, "task_loss": 0.33252057433128357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20172345638275146, "epoch": 3.36, "learning_rate": 2.596304794106992e-05, "loss": 0.3977, "step": 3981, "task_loss": 0.18446749448776245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3938625454902649, "epoch": 3.37, "learning_rate": 2.595701002294409e-05, "loss": 0.3993, "step": 3982, "task_loss": 0.15423938632011414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40341082215309143, "epoch": 3.37, "learning_rate": 2.595097210481826e-05, "loss": 0.437, "step": 3983, "task_loss": 0.625878095626831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6512993574142456, "epoch": 3.37, "learning_rate": 2.5944934186692434e-05, "loss": 0.5717, "step": 3984, "task_loss": 2.4081974029541016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3270098567008972, "epoch": 3.37, "learning_rate": 2.5938896268566598e-05, "loss": 0.4602, "step": 3985, "task_loss": 0.26746249198913574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4951297342777252, "epoch": 3.37, "learning_rate": 2.5932858350440768e-05, "loss": 0.4552, "step": 3986, "task_loss": 1.4795302152633667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26534271240234375, "epoch": 3.37, "learning_rate": 2.5926820432314942e-05, "loss": 0.373, "step": 3987, "task_loss": 0.3624575436115265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2847496271133423, "epoch": 3.37, "learning_rate": 2.5920782514189106e-05, "loss": 0.4854, "step": 3988, "task_loss": 0.2905990183353424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.502437949180603, "epoch": 3.37, "learning_rate": 2.591474459606328e-05, "loss": 0.4349, "step": 3989, "task_loss": 0.5955048203468323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5499303340911865, "epoch": 3.37, "learning_rate": 2.590870667793745e-05, "loss": 0.5782, "step": 3990, "task_loss": 0.9382873177528381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5605165362358093, "epoch": 3.37, "learning_rate": 2.5902668759811614e-05, "loss": 0.4834, "step": 3991, "task_loss": 0.48812568187713623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3969873785972595, "epoch": 3.37, "learning_rate": 2.5896630841685788e-05, "loss": 0.412, "step": 3992, "task_loss": 0.5489519834518433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4259173274040222, "epoch": 3.38, "learning_rate": 2.589059292355996e-05, "loss": 0.442, "step": 3993, "task_loss": 0.41963398456573486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21472740173339844, "epoch": 3.38, "learning_rate": 2.588455500543413e-05, "loss": 0.4462, "step": 3994, "task_loss": 0.20996612310409546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32655590772628784, "epoch": 3.38, "learning_rate": 2.5878517087308297e-05, "loss": 0.4903, "step": 3995, "task_loss": 0.6842656135559082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4879166781902313, "epoch": 3.38, "learning_rate": 2.5872479169182467e-05, "loss": 0.5153, "step": 3996, "task_loss": 0.500050961971283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43237537145614624, "epoch": 3.38, "learning_rate": 2.5866441251056638e-05, "loss": 0.4249, "step": 3997, "task_loss": 1.341585636138916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2903965413570404, "epoch": 3.38, "learning_rate": 2.5860403332930805e-05, "loss": 0.41, "step": 3998, "task_loss": 0.6507741808891296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4390353560447693, "epoch": 3.38, "learning_rate": 2.5854365414804975e-05, "loss": 0.4655, "step": 3999, "task_loss": 0.48864781856536865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29041317105293274, "epoch": 3.38, "learning_rate": 2.584832749667915e-05, "loss": 0.3949, "step": 4000, "task_loss": 0.700737714767456 }, { "epoch": 3.38, "eval_accuracy": 0.9078811881188119, "eval_loss": 0.2838531732559204, "eval_runtime": 318.9403, "eval_samples_per_second": 79.168, "eval_steps_per_second": 0.621, "step": 4000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5358077883720398, "epoch": 3.38, "learning_rate": 2.5842289578553313e-05, "loss": 0.4389, "step": 4001, "task_loss": 0.7069559097290039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5776588320732117, "epoch": 3.38, "learning_rate": 2.5836251660427484e-05, "loss": 0.4192, "step": 4002, "task_loss": 0.6551102995872498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6500595808029175, "epoch": 3.38, "learning_rate": 2.5830213742301658e-05, "loss": 0.4515, "step": 4003, "task_loss": 0.8764891624450684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4348542392253876, "epoch": 3.38, "learning_rate": 2.582417582417583e-05, "loss": 0.3825, "step": 4004, "task_loss": 0.8104791641235352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5905099511146545, "epoch": 3.39, "learning_rate": 2.5818137906049996e-05, "loss": 0.5288, "step": 4005, "task_loss": 0.9732226133346558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39015620946884155, "epoch": 3.39, "learning_rate": 2.5812099987924166e-05, "loss": 0.3415, "step": 4006, "task_loss": 0.6573636531829834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.037064552307129, "epoch": 3.39, "learning_rate": 2.5806062069798337e-05, "loss": 0.5585, "step": 4007, "task_loss": 0.3677390217781067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5325114130973816, "epoch": 3.39, "learning_rate": 2.5800024151672504e-05, "loss": 0.5266, "step": 4008, "task_loss": 0.6213597655296326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.693821907043457, "epoch": 3.39, "learning_rate": 2.5793986233546674e-05, "loss": 0.5475, "step": 4009, "task_loss": 0.35858967900276184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3228412866592407, "epoch": 3.39, "learning_rate": 2.5787948315420845e-05, "loss": 0.3828, "step": 4010, "task_loss": 0.4227518141269684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46212369203567505, "epoch": 3.39, "learning_rate": 2.5781910397295012e-05, "loss": 0.378, "step": 4011, "task_loss": 0.5746100544929504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32506972551345825, "epoch": 3.39, "learning_rate": 2.5775872479169183e-05, "loss": 0.4065, "step": 4012, "task_loss": 0.8649409413337708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48109641671180725, "epoch": 3.39, "learning_rate": 2.5769834561043353e-05, "loss": 0.609, "step": 4013, "task_loss": 2.1062211990356445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7927541136741638, "epoch": 3.39, "learning_rate": 2.5763796642917527e-05, "loss": 0.6549, "step": 4014, "task_loss": 0.8568188548088074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.789509654045105, "epoch": 3.39, "learning_rate": 2.575775872479169e-05, "loss": 0.5583, "step": 4015, "task_loss": 1.0007482767105103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7370916604995728, "epoch": 3.39, "learning_rate": 2.575172080666586e-05, "loss": 0.4744, "step": 4016, "task_loss": 0.7632065415382385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45685434341430664, "epoch": 3.4, "learning_rate": 2.5745682888540036e-05, "loss": 0.4484, "step": 4017, "task_loss": 0.26755595207214355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3917536735534668, "epoch": 3.4, "learning_rate": 2.57396449704142e-05, "loss": 0.5348, "step": 4018, "task_loss": 0.5080838203430176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47289276123046875, "epoch": 3.4, "learning_rate": 2.5733607052288373e-05, "loss": 0.5065, "step": 4019, "task_loss": 1.0941957235336304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5357720851898193, "epoch": 3.4, "learning_rate": 2.5727569134162544e-05, "loss": 0.4133, "step": 4020, "task_loss": 0.34930291771888733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46537530422210693, "epoch": 3.4, "learning_rate": 2.572153121603671e-05, "loss": 0.5077, "step": 4021, "task_loss": 0.5046914219856262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3924690783023834, "epoch": 3.4, "learning_rate": 2.5715493297910882e-05, "loss": 0.633, "step": 4022, "task_loss": 0.3602185845375061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25308725237846375, "epoch": 3.4, "learning_rate": 2.5709455379785052e-05, "loss": 0.4482, "step": 4023, "task_loss": 0.1780484914779663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6798350214958191, "epoch": 3.4, "learning_rate": 2.5703417461659223e-05, "loss": 0.5507, "step": 4024, "task_loss": 0.6738662719726562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37544792890548706, "epoch": 3.4, "learning_rate": 2.569737954353339e-05, "loss": 0.4753, "step": 4025, "task_loss": 0.5693057775497437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4914456307888031, "epoch": 3.4, "learning_rate": 2.569134162540756e-05, "loss": 0.541, "step": 4026, "task_loss": 0.7014830112457275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45919549465179443, "epoch": 3.4, "learning_rate": 2.568530370728173e-05, "loss": 0.5328, "step": 4027, "task_loss": 1.279834508895874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37162601947784424, "epoch": 3.4, "learning_rate": 2.56792657891559e-05, "loss": 0.4489, "step": 4028, "task_loss": 1.2803676128387451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4523841142654419, "epoch": 3.41, "learning_rate": 2.567322787103007e-05, "loss": 0.396, "step": 4029, "task_loss": 1.0253190994262695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.367323100566864, "epoch": 3.41, "learning_rate": 2.5667189952904243e-05, "loss": 0.4948, "step": 4030, "task_loss": 0.40665867924690247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38436606526374817, "epoch": 3.41, "learning_rate": 2.5661152034778407e-05, "loss": 0.4786, "step": 4031, "task_loss": 0.33141157031059265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3912968933582306, "epoch": 3.41, "learning_rate": 2.5655114116652577e-05, "loss": 0.4528, "step": 4032, "task_loss": 0.64816814661026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4542107582092285, "epoch": 3.41, "learning_rate": 2.564907619852675e-05, "loss": 0.4212, "step": 4033, "task_loss": 0.3282413184642792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3605651259422302, "epoch": 3.41, "learning_rate": 2.5643038280400922e-05, "loss": 0.4477, "step": 4034, "task_loss": 1.7820360660552979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5463732481002808, "epoch": 3.41, "learning_rate": 2.563700036227509e-05, "loss": 0.4625, "step": 4035, "task_loss": 1.0151199102401733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5436286330223083, "epoch": 3.41, "learning_rate": 2.563096244414926e-05, "loss": 0.414, "step": 4036, "task_loss": 1.1057696342468262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5385617017745972, "epoch": 3.41, "learning_rate": 2.562492452602343e-05, "loss": 0.4778, "step": 4037, "task_loss": 0.32790130376815796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7293095588684082, "epoch": 3.41, "learning_rate": 2.5618886607897597e-05, "loss": 0.5721, "step": 4038, "task_loss": 1.2795405387878418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32933127880096436, "epoch": 3.41, "learning_rate": 2.5612848689771768e-05, "loss": 0.4155, "step": 4039, "task_loss": 1.0040210485458374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22331589460372925, "epoch": 3.41, "learning_rate": 2.560681077164594e-05, "loss": 0.303, "step": 4040, "task_loss": 0.3825274705886841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6016868948936462, "epoch": 3.42, "learning_rate": 2.5600772853520106e-05, "loss": 0.504, "step": 4041, "task_loss": 1.0376375913619995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3244531452655792, "epoch": 3.42, "learning_rate": 2.5594734935394276e-05, "loss": 0.4676, "step": 4042, "task_loss": 0.27999818325042725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34826868772506714, "epoch": 3.42, "learning_rate": 2.5588697017268447e-05, "loss": 0.4714, "step": 4043, "task_loss": 1.071108102798462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4084356129169464, "epoch": 3.42, "learning_rate": 2.558265909914262e-05, "loss": 0.3832, "step": 4044, "task_loss": 0.13053731620311737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5513164401054382, "epoch": 3.42, "learning_rate": 2.5576621181016785e-05, "loss": 0.404, "step": 4045, "task_loss": 0.4364088177680969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43724358081817627, "epoch": 3.42, "learning_rate": 2.557058326289096e-05, "loss": 0.3921, "step": 4046, "task_loss": 0.8366675972938538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45288553833961487, "epoch": 3.42, "learning_rate": 2.556454534476513e-05, "loss": 0.4625, "step": 4047, "task_loss": 0.676979124546051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3728858232498169, "epoch": 3.42, "learning_rate": 2.5558507426639293e-05, "loss": 0.4154, "step": 4048, "task_loss": 0.3735004663467407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4070954918861389, "epoch": 3.42, "learning_rate": 2.5552469508513467e-05, "loss": 0.4798, "step": 4049, "task_loss": 0.33586418628692627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3026151657104492, "epoch": 3.42, "learning_rate": 2.5546431590387637e-05, "loss": 0.499, "step": 4050, "task_loss": 0.6694821119308472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49613410234451294, "epoch": 3.42, "learning_rate": 2.5540393672261805e-05, "loss": 0.5048, "step": 4051, "task_loss": 1.4746835231781006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37304627895355225, "epoch": 3.42, "learning_rate": 2.5534355754135975e-05, "loss": 0.4563, "step": 4052, "task_loss": 0.5727298855781555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4977983236312866, "epoch": 3.43, "learning_rate": 2.5528317836010146e-05, "loss": 0.4174, "step": 4053, "task_loss": 0.639698326587677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5163803100585938, "epoch": 3.43, "learning_rate": 2.5522279917884313e-05, "loss": 0.4646, "step": 4054, "task_loss": 0.46796947717666626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4531753957271576, "epoch": 3.43, "learning_rate": 2.5516241999758483e-05, "loss": 0.4374, "step": 4055, "task_loss": 0.5997196435928345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3633864223957062, "epoch": 3.43, "learning_rate": 2.5510204081632654e-05, "loss": 0.6105, "step": 4056, "task_loss": 0.7658596634864807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3204787075519562, "epoch": 3.43, "learning_rate": 2.5504166163506828e-05, "loss": 0.4049, "step": 4057, "task_loss": 0.4774383306503296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4575861096382141, "epoch": 3.43, "learning_rate": 2.5498128245380992e-05, "loss": 0.3928, "step": 4058, "task_loss": 0.5776827931404114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38638603687286377, "epoch": 3.43, "learning_rate": 2.5492090327255162e-05, "loss": 0.3832, "step": 4059, "task_loss": 0.98641437292099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3699021339416504, "epoch": 3.43, "learning_rate": 2.5486052409129336e-05, "loss": 0.4019, "step": 4060, "task_loss": 0.27561619877815247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24132807552814484, "epoch": 3.43, "learning_rate": 2.54800144910035e-05, "loss": 0.3462, "step": 4061, "task_loss": 0.14319607615470886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20079445838928223, "epoch": 3.43, "learning_rate": 2.5473976572877674e-05, "loss": 0.4463, "step": 4062, "task_loss": 0.6595559120178223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.730699896812439, "epoch": 3.43, "learning_rate": 2.5467938654751845e-05, "loss": 0.4433, "step": 4063, "task_loss": 0.4659324586391449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34289103746414185, "epoch": 3.44, "learning_rate": 2.546190073662601e-05, "loss": 0.4053, "step": 4064, "task_loss": 0.25328466296195984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4317112863063812, "epoch": 3.44, "learning_rate": 2.5455862818500182e-05, "loss": 0.4519, "step": 4065, "task_loss": 0.3326265215873718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48618945479393005, "epoch": 3.44, "learning_rate": 2.5449824900374353e-05, "loss": 0.4823, "step": 4066, "task_loss": 0.7667815685272217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7732630968093872, "epoch": 3.44, "learning_rate": 2.5443786982248524e-05, "loss": 0.535, "step": 4067, "task_loss": 1.3694840669631958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35516658425331116, "epoch": 3.44, "learning_rate": 2.543774906412269e-05, "loss": 0.513, "step": 4068, "task_loss": 0.47205081582069397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42593684792518616, "epoch": 3.44, "learning_rate": 2.543171114599686e-05, "loss": 0.4562, "step": 4069, "task_loss": 0.16923226416110992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3349229693412781, "epoch": 3.44, "learning_rate": 2.5425673227871032e-05, "loss": 0.4509, "step": 4070, "task_loss": 0.8104509115219116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.582404375076294, "epoch": 3.44, "learning_rate": 2.54196353097452e-05, "loss": 0.4403, "step": 4071, "task_loss": 1.745675802230835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35391777753829956, "epoch": 3.44, "learning_rate": 2.541359739161937e-05, "loss": 0.4471, "step": 4072, "task_loss": 1.2795908451080322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6049908399581909, "epoch": 3.44, "learning_rate": 2.540755947349354e-05, "loss": 0.4597, "step": 4073, "task_loss": 0.72719806432724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3972572088241577, "epoch": 3.44, "learning_rate": 2.5401521555367707e-05, "loss": 0.3889, "step": 4074, "task_loss": 0.7305880188941956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37300562858581543, "epoch": 3.44, "learning_rate": 2.5395483637241878e-05, "loss": 0.4887, "step": 4075, "task_loss": 1.4905622005462646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27409934997558594, "epoch": 3.45, "learning_rate": 2.5389445719116052e-05, "loss": 0.2843, "step": 4076, "task_loss": 0.42888256907463074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5826777219772339, "epoch": 3.45, "learning_rate": 2.5383407800990222e-05, "loss": 0.4448, "step": 4077, "task_loss": 0.9647121429443359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30868369340896606, "epoch": 3.45, "learning_rate": 2.537736988286439e-05, "loss": 0.4909, "step": 4078, "task_loss": 0.73536217212677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4030818045139313, "epoch": 3.45, "learning_rate": 2.537133196473856e-05, "loss": 0.4099, "step": 4079, "task_loss": 0.8158953189849854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39338839054107666, "epoch": 3.45, "learning_rate": 2.536529404661273e-05, "loss": 0.4997, "step": 4080, "task_loss": 0.15156243741512299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31900835037231445, "epoch": 3.45, "learning_rate": 2.5359256128486898e-05, "loss": 0.374, "step": 4081, "task_loss": 1.151689052581787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6962835192680359, "epoch": 3.45, "learning_rate": 2.535321821036107e-05, "loss": 0.6648, "step": 4082, "task_loss": 1.295904517173767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34238407015800476, "epoch": 3.45, "learning_rate": 2.534718029223524e-05, "loss": 0.4013, "step": 4083, "task_loss": 0.25396353006362915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2826879024505615, "epoch": 3.45, "learning_rate": 2.5341142374109406e-05, "loss": 0.492, "step": 4084, "task_loss": 0.49424418807029724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37059730291366577, "epoch": 3.45, "learning_rate": 2.5335104455983577e-05, "loss": 0.3596, "step": 4085, "task_loss": 0.764437198638916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3325430154800415, "epoch": 3.45, "learning_rate": 2.5329066537857747e-05, "loss": 0.4144, "step": 4086, "task_loss": 0.6910301446914673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4724382758140564, "epoch": 3.45, "learning_rate": 2.532302861973192e-05, "loss": 0.3967, "step": 4087, "task_loss": 0.49892276525497437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5692050457000732, "epoch": 3.46, "learning_rate": 2.5316990701606085e-05, "loss": 0.6624, "step": 4088, "task_loss": 0.7348126173019409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3876528739929199, "epoch": 3.46, "learning_rate": 2.5310952783480256e-05, "loss": 0.3485, "step": 4089, "task_loss": 0.5500836372375488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35622119903564453, "epoch": 3.46, "learning_rate": 2.530491486535443e-05, "loss": 0.3909, "step": 4090, "task_loss": 1.560074806213379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29115885496139526, "epoch": 3.46, "learning_rate": 2.5298876947228594e-05, "loss": 0.4644, "step": 4091, "task_loss": 0.36166074872016907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4171428680419922, "epoch": 3.46, "learning_rate": 2.5292839029102767e-05, "loss": 0.5382, "step": 4092, "task_loss": 0.7752326726913452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5782561302185059, "epoch": 3.46, "learning_rate": 2.5286801110976938e-05, "loss": 0.4253, "step": 4093, "task_loss": 0.602676510810852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2677343189716339, "epoch": 3.46, "learning_rate": 2.5280763192851102e-05, "loss": 0.4128, "step": 4094, "task_loss": 0.2722078263759613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40810149908065796, "epoch": 3.46, "learning_rate": 2.5274725274725276e-05, "loss": 0.4554, "step": 4095, "task_loss": 1.6742973327636719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22833240032196045, "epoch": 3.46, "learning_rate": 2.5268687356599446e-05, "loss": 0.42, "step": 4096, "task_loss": 0.10234031826257706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30138906836509705, "epoch": 3.46, "learning_rate": 2.5262649438473617e-05, "loss": 0.3531, "step": 4097, "task_loss": 0.29494139552116394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2624063193798065, "epoch": 3.46, "learning_rate": 2.5256611520347784e-05, "loss": 0.4333, "step": 4098, "task_loss": 0.4572010636329651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.2428979873657227, "epoch": 3.46, "learning_rate": 2.5250573602221955e-05, "loss": 0.6312, "step": 4099, "task_loss": 0.8496942520141602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2666168212890625, "epoch": 3.47, "learning_rate": 2.5244535684096125e-05, "loss": 0.4121, "step": 4100, "task_loss": 0.1668895184993744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5462213754653931, "epoch": 3.47, "learning_rate": 2.5238497765970292e-05, "loss": 0.589, "step": 4101, "task_loss": 0.3051339387893677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42921486496925354, "epoch": 3.47, "learning_rate": 2.5232459847844463e-05, "loss": 0.3798, "step": 4102, "task_loss": 0.4368909001350403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35626456141471863, "epoch": 3.47, "learning_rate": 2.5226421929718637e-05, "loss": 0.4903, "step": 4103, "task_loss": 0.8252670168876648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5845634341239929, "epoch": 3.47, "learning_rate": 2.52203840115928e-05, "loss": 0.4807, "step": 4104, "task_loss": 0.5948047041893005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42381173372268677, "epoch": 3.47, "learning_rate": 2.521434609346697e-05, "loss": 0.3801, "step": 4105, "task_loss": 0.4511117935180664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5668853521347046, "epoch": 3.47, "learning_rate": 2.5208308175341145e-05, "loss": 0.4654, "step": 4106, "task_loss": 0.5723603367805481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22313551604747772, "epoch": 3.47, "learning_rate": 2.5202270257215316e-05, "loss": 0.358, "step": 4107, "task_loss": 0.7811095118522644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6473349928855896, "epoch": 3.47, "learning_rate": 2.5196232339089483e-05, "loss": 0.4265, "step": 4108, "task_loss": 0.5270083546638489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7142409682273865, "epoch": 3.47, "learning_rate": 2.5190194420963654e-05, "loss": 0.4397, "step": 4109, "task_loss": 0.4688701927661896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4456367790699005, "epoch": 3.47, "learning_rate": 2.5184156502837824e-05, "loss": 0.3696, "step": 4110, "task_loss": 0.5032515525817871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5702639818191528, "epoch": 3.47, "learning_rate": 2.517811858471199e-05, "loss": 0.5026, "step": 4111, "task_loss": 0.5084189772605896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.529504120349884, "epoch": 3.48, "learning_rate": 2.5172080666586162e-05, "loss": 0.4059, "step": 4112, "task_loss": 0.13888682425022125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32843875885009766, "epoch": 3.48, "learning_rate": 2.5166042748460333e-05, "loss": 0.3234, "step": 4113, "task_loss": 0.859766960144043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4875431954860687, "epoch": 3.48, "learning_rate": 2.51600048303345e-05, "loss": 0.4464, "step": 4114, "task_loss": 0.2551010847091675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24725782871246338, "epoch": 3.48, "learning_rate": 2.515396691220867e-05, "loss": 0.3212, "step": 4115, "task_loss": 0.19866029918193817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8121551871299744, "epoch": 3.48, "learning_rate": 2.514792899408284e-05, "loss": 0.5466, "step": 4116, "task_loss": 1.2144768238067627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.337194561958313, "epoch": 3.48, "learning_rate": 2.5141891075957015e-05, "loss": 0.3806, "step": 4117, "task_loss": 0.5482938885688782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5421404838562012, "epoch": 3.48, "learning_rate": 2.513585315783118e-05, "loss": 0.4704, "step": 4118, "task_loss": 1.0548559427261353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.277373731136322, "epoch": 3.48, "learning_rate": 2.5129815239705353e-05, "loss": 0.3712, "step": 4119, "task_loss": 1.143834114074707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35789236426353455, "epoch": 3.48, "learning_rate": 2.5123777321579523e-05, "loss": 0.4168, "step": 4120, "task_loss": 0.8498877882957458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40319404006004333, "epoch": 3.48, "learning_rate": 2.5117739403453687e-05, "loss": 0.5295, "step": 4121, "task_loss": 0.6738725900650024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5094547271728516, "epoch": 3.48, "learning_rate": 2.511170148532786e-05, "loss": 0.4534, "step": 4122, "task_loss": 0.2929881513118744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38450345396995544, "epoch": 3.48, "learning_rate": 2.510566356720203e-05, "loss": 0.339, "step": 4123, "task_loss": 0.5582667589187622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43607163429260254, "epoch": 3.49, "learning_rate": 2.50996256490762e-05, "loss": 0.4321, "step": 4124, "task_loss": 0.2486065924167633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4616132080554962, "epoch": 3.49, "learning_rate": 2.509358773095037e-05, "loss": 0.4431, "step": 4125, "task_loss": 0.8427473306655884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48816365003585815, "epoch": 3.49, "learning_rate": 2.508754981282454e-05, "loss": 0.4491, "step": 4126, "task_loss": 0.9274172186851501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2371133267879486, "epoch": 3.49, "learning_rate": 2.508151189469871e-05, "loss": 0.3598, "step": 4127, "task_loss": 0.1185678094625473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28138816356658936, "epoch": 3.49, "learning_rate": 2.5075473976572878e-05, "loss": 0.4112, "step": 4128, "task_loss": 0.1809827983379364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47016775608062744, "epoch": 3.49, "learning_rate": 2.5069436058447048e-05, "loss": 0.5281, "step": 4129, "task_loss": 0.2583012282848358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42749184370040894, "epoch": 3.49, "learning_rate": 2.506339814032122e-05, "loss": 0.3794, "step": 4130, "task_loss": 0.35947489738464355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5594605207443237, "epoch": 3.49, "learning_rate": 2.5057360222195386e-05, "loss": 0.4209, "step": 4131, "task_loss": 0.9739482402801514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48497915267944336, "epoch": 3.49, "learning_rate": 2.5051322304069556e-05, "loss": 0.3455, "step": 4132, "task_loss": 0.6561703085899353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5665384531021118, "epoch": 3.49, "learning_rate": 2.504528438594373e-05, "loss": 0.5274, "step": 4133, "task_loss": 0.7692103981971741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24694278836250305, "epoch": 3.49, "learning_rate": 2.5039246467817894e-05, "loss": 0.4679, "step": 4134, "task_loss": 0.20865769684314728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27790069580078125, "epoch": 3.5, "learning_rate": 2.5033208549692068e-05, "loss": 0.4362, "step": 4135, "task_loss": 0.4595635235309601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39106935262680054, "epoch": 3.5, "learning_rate": 2.502717063156624e-05, "loss": 0.3928, "step": 4136, "task_loss": 0.8336305618286133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32807350158691406, "epoch": 3.5, "learning_rate": 2.502113271344041e-05, "loss": 0.3869, "step": 4137, "task_loss": 0.27206867933273315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2517324686050415, "epoch": 3.5, "learning_rate": 2.5015094795314576e-05, "loss": 0.4029, "step": 4138, "task_loss": 0.24441707134246826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5515205264091492, "epoch": 3.5, "learning_rate": 2.5009056877188747e-05, "loss": 0.5683, "step": 4139, "task_loss": 0.6224509477615356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41551434993743896, "epoch": 3.5, "learning_rate": 2.5003018959062918e-05, "loss": 0.6178, "step": 4140, "task_loss": 0.3465399742126465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8326511383056641, "epoch": 3.5, "learning_rate": 2.4996981040937088e-05, "loss": 0.441, "step": 4141, "task_loss": 0.5231512188911438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35185712575912476, "epoch": 3.5, "learning_rate": 2.4990943122811255e-05, "loss": 0.3821, "step": 4142, "task_loss": 0.9748257994651794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2846475839614868, "epoch": 3.5, "learning_rate": 2.4984905204685426e-05, "loss": 0.4315, "step": 4143, "task_loss": 0.034726522862911224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4139280617237091, "epoch": 3.5, "learning_rate": 2.4978867286559597e-05, "loss": 0.3786, "step": 4144, "task_loss": 0.574490487575531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35014408826828003, "epoch": 3.5, "learning_rate": 2.4972829368433764e-05, "loss": 0.3891, "step": 4145, "task_loss": 0.48398175835609436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40835708379745483, "epoch": 3.5, "learning_rate": 2.4966791450307934e-05, "loss": 0.3912, "step": 4146, "task_loss": 0.9589718580245972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5052321553230286, "epoch": 3.51, "learning_rate": 2.4960753532182105e-05, "loss": 0.4764, "step": 4147, "task_loss": 0.9470455050468445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5461833477020264, "epoch": 3.51, "learning_rate": 2.4954715614056272e-05, "loss": 0.5397, "step": 4148, "task_loss": 0.6754130125045776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5952012538909912, "epoch": 3.51, "learning_rate": 2.4948677695930446e-05, "loss": 0.4089, "step": 4149, "task_loss": 0.19444644451141357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48436856269836426, "epoch": 3.51, "learning_rate": 2.4942639777804613e-05, "loss": 0.5104, "step": 4150, "task_loss": 1.5825121402740479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2705532908439636, "epoch": 3.51, "learning_rate": 2.4936601859678784e-05, "loss": 0.5852, "step": 4151, "task_loss": 0.13943296670913696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6059427261352539, "epoch": 3.51, "learning_rate": 2.4930563941552954e-05, "loss": 0.5276, "step": 4152, "task_loss": 0.06787481158971786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35149678587913513, "epoch": 3.51, "learning_rate": 2.492452602342712e-05, "loss": 0.4485, "step": 4153, "task_loss": 1.200256109237671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5006035566329956, "epoch": 3.51, "learning_rate": 2.4918488105301292e-05, "loss": 0.4649, "step": 4154, "task_loss": 0.44053512811660767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46721553802490234, "epoch": 3.51, "learning_rate": 2.4912450187175463e-05, "loss": 0.3976, "step": 4155, "task_loss": 0.26289451122283936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31091299653053284, "epoch": 3.51, "learning_rate": 2.4906412269049633e-05, "loss": 0.4679, "step": 4156, "task_loss": 0.3645521104335785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6811827421188354, "epoch": 3.51, "learning_rate": 2.4900374350923804e-05, "loss": 0.4378, "step": 4157, "task_loss": 0.5220677852630615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4901278614997864, "epoch": 3.51, "learning_rate": 2.489433643279797e-05, "loss": 0.506, "step": 4158, "task_loss": 1.0347304344177246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5394182801246643, "epoch": 3.52, "learning_rate": 2.488829851467214e-05, "loss": 0.4102, "step": 4159, "task_loss": 0.5393096804618835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5971667766571045, "epoch": 3.52, "learning_rate": 2.4882260596546312e-05, "loss": 0.4957, "step": 4160, "task_loss": 0.6841007471084595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5364221334457397, "epoch": 3.52, "learning_rate": 2.4876222678420483e-05, "loss": 0.6719, "step": 4161, "task_loss": 1.1949772834777832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.491979718208313, "epoch": 3.52, "learning_rate": 2.487018476029465e-05, "loss": 0.4646, "step": 4162, "task_loss": 0.911540687084198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29036808013916016, "epoch": 3.52, "learning_rate": 2.486414684216882e-05, "loss": 0.472, "step": 4163, "task_loss": 0.649093747138977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.757514238357544, "epoch": 3.52, "learning_rate": 2.485810892404299e-05, "loss": 0.5784, "step": 4164, "task_loss": 0.6331304907798767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6435639262199402, "epoch": 3.52, "learning_rate": 2.485207100591716e-05, "loss": 0.5118, "step": 4165, "task_loss": 1.2262697219848633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33983007073402405, "epoch": 3.52, "learning_rate": 2.4846033087791332e-05, "loss": 0.4978, "step": 4166, "task_loss": 0.27342158555984497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5834276676177979, "epoch": 3.52, "learning_rate": 2.48399951696655e-05, "loss": 0.5572, "step": 4167, "task_loss": 1.1426045894622803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3058737814426422, "epoch": 3.52, "learning_rate": 2.483395725153967e-05, "loss": 0.4152, "step": 4168, "task_loss": 0.24060367047786713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46597519516944885, "epoch": 3.52, "learning_rate": 2.482791933341384e-05, "loss": 0.3734, "step": 4169, "task_loss": 0.16853992640972137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6405997276306152, "epoch": 3.52, "learning_rate": 2.4821881415288008e-05, "loss": 0.4001, "step": 4170, "task_loss": 0.7191398739814758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36974626779556274, "epoch": 3.53, "learning_rate": 2.481584349716218e-05, "loss": 0.4058, "step": 4171, "task_loss": 0.4299275577068329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5874122381210327, "epoch": 3.53, "learning_rate": 2.480980557903635e-05, "loss": 0.5023, "step": 4172, "task_loss": 1.3375029563903809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6342226266860962, "epoch": 3.53, "learning_rate": 2.480376766091052e-05, "loss": 0.742, "step": 4173, "task_loss": 1.6991150379180908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7013994455337524, "epoch": 3.53, "learning_rate": 2.479772974278469e-05, "loss": 0.5168, "step": 4174, "task_loss": 1.489614486694336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44349557161331177, "epoch": 3.53, "learning_rate": 2.4791691824658857e-05, "loss": 0.4637, "step": 4175, "task_loss": 0.8531488180160522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5182134509086609, "epoch": 3.53, "learning_rate": 2.478565390653303e-05, "loss": 0.5664, "step": 4176, "task_loss": 0.7050455212593079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5029684901237488, "epoch": 3.53, "learning_rate": 2.4779615988407198e-05, "loss": 0.5261, "step": 4177, "task_loss": 1.2686779499053955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2848520874977112, "epoch": 3.53, "learning_rate": 2.4773578070281365e-05, "loss": 0.4009, "step": 4178, "task_loss": 0.48773929476737976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7572857737541199, "epoch": 3.53, "learning_rate": 2.476754015215554e-05, "loss": 0.476, "step": 4179, "task_loss": 1.0687901973724365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44954535365104675, "epoch": 3.53, "learning_rate": 2.4761502234029707e-05, "loss": 0.4095, "step": 4180, "task_loss": 0.3427490293979645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4208841323852539, "epoch": 3.53, "learning_rate": 2.4755464315903877e-05, "loss": 0.4729, "step": 4181, "task_loss": 1.0560078620910645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3869362771511078, "epoch": 3.53, "learning_rate": 2.4749426397778048e-05, "loss": 0.4727, "step": 4182, "task_loss": 0.9952542185783386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3435210883617401, "epoch": 3.54, "learning_rate": 2.4743388479652215e-05, "loss": 0.4129, "step": 4183, "task_loss": 0.10386593639850616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3564915657043457, "epoch": 3.54, "learning_rate": 2.473735056152639e-05, "loss": 0.3294, "step": 4184, "task_loss": 0.3820424973964691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35602158308029175, "epoch": 3.54, "learning_rate": 2.4731312643400556e-05, "loss": 0.4197, "step": 4185, "task_loss": 1.1872541904449463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48267582058906555, "epoch": 3.54, "learning_rate": 2.4725274725274727e-05, "loss": 0.3543, "step": 4186, "task_loss": 0.8485729694366455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6385226249694824, "epoch": 3.54, "learning_rate": 2.4719236807148897e-05, "loss": 0.3816, "step": 4187, "task_loss": 0.3622574210166931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3397260308265686, "epoch": 3.54, "learning_rate": 2.4713198889023064e-05, "loss": 0.5429, "step": 4188, "task_loss": 0.5081762671470642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3384047746658325, "epoch": 3.54, "learning_rate": 2.4707160970897235e-05, "loss": 0.4805, "step": 4189, "task_loss": 1.0034904479980469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5230478048324585, "epoch": 3.54, "learning_rate": 2.4701123052771406e-05, "loss": 0.4359, "step": 4190, "task_loss": 0.6231204271316528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2623027563095093, "epoch": 3.54, "learning_rate": 2.4695085134645576e-05, "loss": 0.406, "step": 4191, "task_loss": 0.22444505989551544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2907571792602539, "epoch": 3.54, "learning_rate": 2.4689047216519747e-05, "loss": 0.4715, "step": 4192, "task_loss": 0.7892584204673767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3273170590400696, "epoch": 3.54, "learning_rate": 2.4683009298393914e-05, "loss": 0.4453, "step": 4193, "task_loss": 0.876335859298706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5169587135314941, "epoch": 3.54, "learning_rate": 2.4676971380268084e-05, "loss": 0.4075, "step": 4194, "task_loss": 0.22665148973464966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4507051110267639, "epoch": 3.55, "learning_rate": 2.4670933462142255e-05, "loss": 0.4635, "step": 4195, "task_loss": 1.045668363571167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6659261584281921, "epoch": 3.55, "learning_rate": 2.4664895544016426e-05, "loss": 0.3897, "step": 4196, "task_loss": 0.7881188988685608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42021965980529785, "epoch": 3.55, "learning_rate": 2.4658857625890593e-05, "loss": 0.4411, "step": 4197, "task_loss": 0.6973052024841309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2337799370288849, "epoch": 3.55, "learning_rate": 2.4652819707764763e-05, "loss": 0.4365, "step": 4198, "task_loss": 1.0139292478561401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5530529022216797, "epoch": 3.55, "learning_rate": 2.4646781789638934e-05, "loss": 0.4483, "step": 4199, "task_loss": 0.7564842700958252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.402576208114624, "epoch": 3.55, "learning_rate": 2.4640743871513104e-05, "loss": 0.4206, "step": 4200, "task_loss": 0.5350391864776611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36014649271965027, "epoch": 3.55, "learning_rate": 2.4634705953387275e-05, "loss": 0.3007, "step": 4201, "task_loss": 0.28406527638435364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6275275945663452, "epoch": 3.55, "learning_rate": 2.4628668035261442e-05, "loss": 0.4731, "step": 4202, "task_loss": 0.47266849875450134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3842344880104065, "epoch": 3.55, "learning_rate": 2.4622630117135613e-05, "loss": 0.4538, "step": 4203, "task_loss": 0.1553061604499817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5058034658432007, "epoch": 3.55, "learning_rate": 2.4616592199009783e-05, "loss": 0.5517, "step": 4204, "task_loss": 0.6412107348442078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31238651275634766, "epoch": 3.55, "learning_rate": 2.461055428088395e-05, "loss": 0.4315, "step": 4205, "task_loss": 0.7764227390289307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5064659118652344, "epoch": 3.56, "learning_rate": 2.4604516362758124e-05, "loss": 0.4227, "step": 4206, "task_loss": 0.6032996773719788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3027956187725067, "epoch": 3.56, "learning_rate": 2.459847844463229e-05, "loss": 0.3812, "step": 4207, "task_loss": 0.3499034345149994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3788808286190033, "epoch": 3.56, "learning_rate": 2.4592440526506462e-05, "loss": 0.3843, "step": 4208, "task_loss": 0.3017846345901489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3057873845100403, "epoch": 3.56, "learning_rate": 2.4586402608380633e-05, "loss": 0.3862, "step": 4209, "task_loss": 0.5248861312866211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18687419593334198, "epoch": 3.56, "learning_rate": 2.45803646902548e-05, "loss": 0.4177, "step": 4210, "task_loss": 0.1880708783864975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4614254832267761, "epoch": 3.56, "learning_rate": 2.457432677212897e-05, "loss": 0.5815, "step": 4211, "task_loss": 0.6216480135917664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6433556079864502, "epoch": 3.56, "learning_rate": 2.456828885400314e-05, "loss": 0.4605, "step": 4212, "task_loss": 0.7380768060684204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29609906673431396, "epoch": 3.56, "learning_rate": 2.456225093587731e-05, "loss": 0.3576, "step": 4213, "task_loss": 0.5521484613418579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2574906349182129, "epoch": 3.56, "learning_rate": 2.4556213017751482e-05, "loss": 0.4631, "step": 4214, "task_loss": 0.8482747077941895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4263721704483032, "epoch": 3.56, "learning_rate": 2.455017509962565e-05, "loss": 0.5769, "step": 4215, "task_loss": 0.5135222673416138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3068527579307556, "epoch": 3.56, "learning_rate": 2.454413718149982e-05, "loss": 0.3343, "step": 4216, "task_loss": 0.21640312671661377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33221495151519775, "epoch": 3.56, "learning_rate": 2.453809926337399e-05, "loss": 0.3153, "step": 4217, "task_loss": 0.5625758767127991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26960498094558716, "epoch": 3.57, "learning_rate": 2.4532061345248158e-05, "loss": 0.4878, "step": 4218, "task_loss": 0.44945892691612244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5922579765319824, "epoch": 3.57, "learning_rate": 2.452602342712233e-05, "loss": 0.4663, "step": 4219, "task_loss": 0.7097660303115845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2538287341594696, "epoch": 3.57, "learning_rate": 2.45199855089965e-05, "loss": 0.4945, "step": 4220, "task_loss": 0.6226836442947388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26428088545799255, "epoch": 3.57, "learning_rate": 2.451394759087067e-05, "loss": 0.387, "step": 4221, "task_loss": 0.11606166511774063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4038158655166626, "epoch": 3.57, "learning_rate": 2.450790967274484e-05, "loss": 0.3627, "step": 4222, "task_loss": 0.9661005735397339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5950110554695129, "epoch": 3.57, "learning_rate": 2.4501871754619007e-05, "loss": 0.3504, "step": 4223, "task_loss": 0.7882821559906006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6520097255706787, "epoch": 3.57, "learning_rate": 2.4495833836493178e-05, "loss": 0.4962, "step": 4224, "task_loss": 1.0100202560424805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5213394165039062, "epoch": 3.57, "learning_rate": 2.448979591836735e-05, "loss": 0.4627, "step": 4225, "task_loss": 1.1861660480499268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43477123975753784, "epoch": 3.57, "learning_rate": 2.448375800024152e-05, "loss": 0.5393, "step": 4226, "task_loss": 0.7448779940605164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35181868076324463, "epoch": 3.57, "learning_rate": 2.4477720082115686e-05, "loss": 0.415, "step": 4227, "task_loss": 0.8360610604286194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42079776525497437, "epoch": 3.57, "learning_rate": 2.4471682163989857e-05, "loss": 0.4293, "step": 4228, "task_loss": 0.2287655621767044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5495008826255798, "epoch": 3.57, "learning_rate": 2.4465644245864027e-05, "loss": 0.5206, "step": 4229, "task_loss": 1.5012588500976562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.932296633720398, "epoch": 3.58, "learning_rate": 2.4459606327738198e-05, "loss": 0.5141, "step": 4230, "task_loss": 1.2828584909439087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3318359851837158, "epoch": 3.58, "learning_rate": 2.445356840961237e-05, "loss": 0.3748, "step": 4231, "task_loss": 0.6501023769378662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3542185425758362, "epoch": 3.58, "learning_rate": 2.4447530491486536e-05, "loss": 0.4486, "step": 4232, "task_loss": 0.6506872177124023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5518895983695984, "epoch": 3.58, "learning_rate": 2.4441492573360706e-05, "loss": 0.4001, "step": 4233, "task_loss": 0.5973522067070007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2684733271598816, "epoch": 3.58, "learning_rate": 2.4435454655234877e-05, "loss": 0.3857, "step": 4234, "task_loss": 0.5242713689804077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9020867943763733, "epoch": 3.58, "learning_rate": 2.4429416737109044e-05, "loss": 0.4952, "step": 4235, "task_loss": 0.6103016138076782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2917327582836151, "epoch": 3.58, "learning_rate": 2.4423378818983218e-05, "loss": 0.3604, "step": 4236, "task_loss": 0.1215207502245903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7477647066116333, "epoch": 3.58, "learning_rate": 2.4417340900857385e-05, "loss": 0.4931, "step": 4237, "task_loss": 1.2171536684036255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31588295102119446, "epoch": 3.58, "learning_rate": 2.4411302982731556e-05, "loss": 0.4191, "step": 4238, "task_loss": 1.2728139162063599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29110074043273926, "epoch": 3.58, "learning_rate": 2.4405265064605726e-05, "loss": 0.3726, "step": 4239, "task_loss": 0.3041709065437317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2606784403324127, "epoch": 3.58, "learning_rate": 2.4399227146479893e-05, "loss": 0.3419, "step": 4240, "task_loss": 1.0654939413070679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43295687437057495, "epoch": 3.58, "learning_rate": 2.4393189228354067e-05, "loss": 0.5271, "step": 4241, "task_loss": 0.4925839304924011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3347214460372925, "epoch": 3.59, "learning_rate": 2.4387151310228235e-05, "loss": 0.5567, "step": 4242, "task_loss": 1.0562480688095093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7010411024093628, "epoch": 3.59, "learning_rate": 2.4381113392102402e-05, "loss": 0.5023, "step": 4243, "task_loss": 0.38347238302230835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2983602285385132, "epoch": 3.59, "learning_rate": 2.4375075473976576e-05, "loss": 0.4398, "step": 4244, "task_loss": 0.8179263472557068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29728031158447266, "epoch": 3.59, "learning_rate": 2.4369037555850743e-05, "loss": 0.4604, "step": 4245, "task_loss": 0.07672573626041412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20008133351802826, "epoch": 3.59, "learning_rate": 2.4362999637724913e-05, "loss": 0.3579, "step": 4246, "task_loss": 0.04613213241100311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24158811569213867, "epoch": 3.59, "learning_rate": 2.4356961719599084e-05, "loss": 0.3769, "step": 4247, "task_loss": 0.5054489970207214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4486289620399475, "epoch": 3.59, "learning_rate": 2.435092380147325e-05, "loss": 0.4967, "step": 4248, "task_loss": 0.255215585231781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5007073879241943, "epoch": 3.59, "learning_rate": 2.4344885883347425e-05, "loss": 0.4072, "step": 4249, "task_loss": 1.3142558336257935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24978311359882355, "epoch": 3.59, "learning_rate": 2.4338847965221592e-05, "loss": 0.2943, "step": 4250, "task_loss": 0.6421040296554565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6007083058357239, "epoch": 3.59, "learning_rate": 2.4332810047095763e-05, "loss": 0.64, "step": 4251, "task_loss": 0.3903038501739502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40951210260391235, "epoch": 3.59, "learning_rate": 2.4326772128969933e-05, "loss": 0.3726, "step": 4252, "task_loss": 0.6708388924598694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5920516848564148, "epoch": 3.59, "learning_rate": 2.43207342108441e-05, "loss": 0.5529, "step": 4253, "task_loss": 2.2030231952667236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23375967144966125, "epoch": 3.6, "learning_rate": 2.431469629271827e-05, "loss": 0.4587, "step": 4254, "task_loss": 0.9116014838218689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3292333483695984, "epoch": 3.6, "learning_rate": 2.4308658374592442e-05, "loss": 0.4187, "step": 4255, "task_loss": 0.3730236291885376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7763853073120117, "epoch": 3.6, "learning_rate": 2.4302620456466612e-05, "loss": 0.4745, "step": 4256, "task_loss": 1.4318147897720337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5134932994842529, "epoch": 3.6, "learning_rate": 2.4296582538340783e-05, "loss": 0.3882, "step": 4257, "task_loss": 0.7235110998153687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21024847030639648, "epoch": 3.6, "learning_rate": 2.429054462021495e-05, "loss": 0.3471, "step": 4258, "task_loss": 0.1869986355304718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47755247354507446, "epoch": 3.6, "learning_rate": 2.428450670208912e-05, "loss": 0.5661, "step": 4259, "task_loss": 0.697269082069397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36786797642707825, "epoch": 3.6, "learning_rate": 2.427846878396329e-05, "loss": 0.4001, "step": 4260, "task_loss": 0.6654212474822998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34388700127601624, "epoch": 3.6, "learning_rate": 2.4272430865837462e-05, "loss": 0.3555, "step": 4261, "task_loss": 0.8720229268074036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41369879245758057, "epoch": 3.6, "learning_rate": 2.426639294771163e-05, "loss": 0.4741, "step": 4262, "task_loss": 0.5306738615036011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4226112365722656, "epoch": 3.6, "learning_rate": 2.42603550295858e-05, "loss": 0.4488, "step": 4263, "task_loss": 0.7581749558448792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32456839084625244, "epoch": 3.6, "learning_rate": 2.425431711145997e-05, "loss": 0.346, "step": 4264, "task_loss": 0.06784942001104355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4157392680644989, "epoch": 3.6, "learning_rate": 2.424827919333414e-05, "loss": 0.4101, "step": 4265, "task_loss": 0.377225786447525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3458455801010132, "epoch": 3.61, "learning_rate": 2.424224127520831e-05, "loss": 0.3572, "step": 4266, "task_loss": 0.3617345988750458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3823479413986206, "epoch": 3.61, "learning_rate": 2.423620335708248e-05, "loss": 0.4181, "step": 4267, "task_loss": 0.729090690612793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4985722303390503, "epoch": 3.61, "learning_rate": 2.423016543895665e-05, "loss": 0.4605, "step": 4268, "task_loss": 1.253476619720459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43405985832214355, "epoch": 3.61, "learning_rate": 2.422412752083082e-05, "loss": 0.5655, "step": 4269, "task_loss": 0.43991178274154663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3254452049732208, "epoch": 3.61, "learning_rate": 2.4218089602704987e-05, "loss": 0.3405, "step": 4270, "task_loss": 0.08201682567596436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5044230818748474, "epoch": 3.61, "learning_rate": 2.421205168457916e-05, "loss": 0.4921, "step": 4271, "task_loss": 1.6212390661239624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2868116497993469, "epoch": 3.61, "learning_rate": 2.4206013766453328e-05, "loss": 0.4965, "step": 4272, "task_loss": 1.9864070415496826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31388136744499207, "epoch": 3.61, "learning_rate": 2.41999758483275e-05, "loss": 0.4907, "step": 4273, "task_loss": 0.795539140701294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21662594377994537, "epoch": 3.61, "learning_rate": 2.419393793020167e-05, "loss": 0.383, "step": 4274, "task_loss": 0.8805877566337585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5334919691085815, "epoch": 3.61, "learning_rate": 2.4187900012075836e-05, "loss": 0.4727, "step": 4275, "task_loss": 0.7506752014160156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4645922780036926, "epoch": 3.61, "learning_rate": 2.4181862093950007e-05, "loss": 0.4859, "step": 4276, "task_loss": 0.6232338547706604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4139849543571472, "epoch": 3.61, "learning_rate": 2.4175824175824177e-05, "loss": 0.4013, "step": 4277, "task_loss": 0.42858392000198364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26512736082077026, "epoch": 3.62, "learning_rate": 2.4169786257698345e-05, "loss": 0.3381, "step": 4278, "task_loss": 0.1346105933189392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35148850083351135, "epoch": 3.62, "learning_rate": 2.416374833957252e-05, "loss": 0.3674, "step": 4279, "task_loss": 0.5607234239578247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5917513370513916, "epoch": 3.62, "learning_rate": 2.4157710421446686e-05, "loss": 0.5401, "step": 4280, "task_loss": 0.42761117219924927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5946917533874512, "epoch": 3.62, "learning_rate": 2.4151672503320856e-05, "loss": 0.3993, "step": 4281, "task_loss": 0.8605582118034363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40059760212898254, "epoch": 3.62, "learning_rate": 2.4145634585195027e-05, "loss": 0.4502, "step": 4282, "task_loss": 0.6441447138786316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1449149250984192, "epoch": 3.62, "learning_rate": 2.4139596667069194e-05, "loss": 0.3204, "step": 4283, "task_loss": 0.2731958329677582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7207844257354736, "epoch": 3.62, "learning_rate": 2.4133558748943365e-05, "loss": 0.4441, "step": 4284, "task_loss": 0.8705777525901794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20856665074825287, "epoch": 3.62, "learning_rate": 2.4127520830817535e-05, "loss": 0.4273, "step": 4285, "task_loss": 0.2781510353088379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5062478184700012, "epoch": 3.62, "learning_rate": 2.4121482912691706e-05, "loss": 0.358, "step": 4286, "task_loss": 0.38318130373954773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33684784173965454, "epoch": 3.62, "learning_rate": 2.4115444994565876e-05, "loss": 0.4025, "step": 4287, "task_loss": 0.24502919614315033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2792580723762512, "epoch": 3.62, "learning_rate": 2.4109407076440044e-05, "loss": 0.2999, "step": 4288, "task_loss": 0.55486661195755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3379784822463989, "epoch": 3.63, "learning_rate": 2.4103369158314214e-05, "loss": 0.4801, "step": 4289, "task_loss": 0.7575530409812927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5320757627487183, "epoch": 3.63, "learning_rate": 2.4097331240188385e-05, "loss": 0.4377, "step": 4290, "task_loss": 1.0669926404953003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29260796308517456, "epoch": 3.63, "learning_rate": 2.4091293322062555e-05, "loss": 0.4638, "step": 4291, "task_loss": 0.24648793041706085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.568744421005249, "epoch": 3.63, "learning_rate": 2.4085255403936722e-05, "loss": 0.4186, "step": 4292, "task_loss": 0.6872087717056274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38329315185546875, "epoch": 3.63, "learning_rate": 2.4079217485810893e-05, "loss": 0.4597, "step": 4293, "task_loss": 0.8579928874969482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5953642725944519, "epoch": 3.63, "learning_rate": 2.4073179567685064e-05, "loss": 0.5061, "step": 4294, "task_loss": 0.9143393039703369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2652750015258789, "epoch": 3.63, "learning_rate": 2.4067141649559234e-05, "loss": 0.3625, "step": 4295, "task_loss": 0.6068036556243896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.382709264755249, "epoch": 3.63, "learning_rate": 2.4061103731433405e-05, "loss": 0.3198, "step": 4296, "task_loss": 0.564424455165863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21933870017528534, "epoch": 3.63, "learning_rate": 2.4055065813307572e-05, "loss": 0.3311, "step": 4297, "task_loss": 0.4811224043369293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5110838413238525, "epoch": 3.63, "learning_rate": 2.4049027895181742e-05, "loss": 0.5668, "step": 4298, "task_loss": 1.233802318572998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2466367483139038, "epoch": 3.63, "learning_rate": 2.4042989977055913e-05, "loss": 0.3103, "step": 4299, "task_loss": 0.5571226477622986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30963999032974243, "epoch": 3.63, "learning_rate": 2.403695205893008e-05, "loss": 0.4769, "step": 4300, "task_loss": 1.0801305770874023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5656776428222656, "epoch": 3.64, "learning_rate": 2.4030914140804254e-05, "loss": 0.5064, "step": 4301, "task_loss": 0.6347982883453369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32780390977859497, "epoch": 3.64, "learning_rate": 2.402487622267842e-05, "loss": 0.4725, "step": 4302, "task_loss": 0.7421913743019104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44117721915245056, "epoch": 3.64, "learning_rate": 2.4018838304552592e-05, "loss": 0.4408, "step": 4303, "task_loss": 0.9643782377243042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6825166344642639, "epoch": 3.64, "learning_rate": 2.4012800386426763e-05, "loss": 0.547, "step": 4304, "task_loss": 0.6610129475593567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4128228724002838, "epoch": 3.64, "learning_rate": 2.400676246830093e-05, "loss": 0.5141, "step": 4305, "task_loss": 1.2728201150894165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30184563994407654, "epoch": 3.64, "learning_rate": 2.4000724550175104e-05, "loss": 0.4079, "step": 4306, "task_loss": 0.290946364402771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43785184621810913, "epoch": 3.64, "learning_rate": 2.399468663204927e-05, "loss": 0.4377, "step": 4307, "task_loss": 0.4729573428630829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20710718631744385, "epoch": 3.64, "learning_rate": 2.3988648713923438e-05, "loss": 0.3775, "step": 4308, "task_loss": 0.2356695830821991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28598278760910034, "epoch": 3.64, "learning_rate": 2.3982610795797612e-05, "loss": 0.4566, "step": 4309, "task_loss": 1.6548908948898315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7102431654930115, "epoch": 3.64, "learning_rate": 2.397657287767178e-05, "loss": 0.5767, "step": 4310, "task_loss": 1.189503788948059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7995768189430237, "epoch": 3.64, "learning_rate": 2.397053495954595e-05, "loss": 0.4772, "step": 4311, "task_loss": 0.644882082939148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42733514308929443, "epoch": 3.64, "learning_rate": 2.396449704142012e-05, "loss": 0.3372, "step": 4312, "task_loss": 0.6000651717185974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3076114058494568, "epoch": 3.65, "learning_rate": 2.3958459123294287e-05, "loss": 0.4481, "step": 4313, "task_loss": 0.3163661062717438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29542970657348633, "epoch": 3.65, "learning_rate": 2.395242120516846e-05, "loss": 0.4422, "step": 4314, "task_loss": 0.6018317341804504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37238702178001404, "epoch": 3.65, "learning_rate": 2.394638328704263e-05, "loss": 0.3786, "step": 4315, "task_loss": 0.8703362345695496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21858958899974823, "epoch": 3.65, "learning_rate": 2.39403453689168e-05, "loss": 0.4006, "step": 4316, "task_loss": 0.46465393900871277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3217204511165619, "epoch": 3.65, "learning_rate": 2.393430745079097e-05, "loss": 0.3566, "step": 4317, "task_loss": 1.2731482982635498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5119150876998901, "epoch": 3.65, "learning_rate": 2.3928269532665137e-05, "loss": 0.4397, "step": 4318, "task_loss": 0.8062406182289124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22020503878593445, "epoch": 3.65, "learning_rate": 2.3922231614539308e-05, "loss": 0.3946, "step": 4319, "task_loss": 0.2701268196105957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42803123593330383, "epoch": 3.65, "learning_rate": 2.3916193696413478e-05, "loss": 0.4025, "step": 4320, "task_loss": 1.002197027206421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2631204426288605, "epoch": 3.65, "learning_rate": 2.3910155778287645e-05, "loss": 0.4617, "step": 4321, "task_loss": 0.8614470362663269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23493555188179016, "epoch": 3.65, "learning_rate": 2.390411786016182e-05, "loss": 0.4022, "step": 4322, "task_loss": 0.18521754443645477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5668258666992188, "epoch": 3.65, "learning_rate": 2.3898079942035986e-05, "loss": 0.4515, "step": 4323, "task_loss": 1.2004319429397583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34108731150627136, "epoch": 3.65, "learning_rate": 2.3892042023910157e-05, "loss": 0.4079, "step": 4324, "task_loss": 0.5488756895065308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2760353684425354, "epoch": 3.66, "learning_rate": 2.3886004105784328e-05, "loss": 0.3942, "step": 4325, "task_loss": 0.07021418958902359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3451019525527954, "epoch": 3.66, "learning_rate": 2.3879966187658495e-05, "loss": 0.4069, "step": 4326, "task_loss": 1.2411590814590454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26589030027389526, "epoch": 3.66, "learning_rate": 2.3873928269532665e-05, "loss": 0.3399, "step": 4327, "task_loss": 0.40344950556755066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2504081130027771, "epoch": 3.66, "learning_rate": 2.3867890351406836e-05, "loss": 0.384, "step": 4328, "task_loss": 0.21057577431201935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34401214122772217, "epoch": 3.66, "learning_rate": 2.3861852433281006e-05, "loss": 0.3269, "step": 4329, "task_loss": 0.27278146147727966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5485828518867493, "epoch": 3.66, "learning_rate": 2.3855814515155177e-05, "loss": 0.5269, "step": 4330, "task_loss": 0.6512842178344727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46600857377052307, "epoch": 3.66, "learning_rate": 2.3849776597029344e-05, "loss": 0.4406, "step": 4331, "task_loss": 0.41783779859542847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3027346730232239, "epoch": 3.66, "learning_rate": 2.3843738678903515e-05, "loss": 0.314, "step": 4332, "task_loss": 0.724738359451294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6416730284690857, "epoch": 3.66, "learning_rate": 2.3837700760777685e-05, "loss": 0.4544, "step": 4333, "task_loss": 0.4980444610118866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4054110050201416, "epoch": 3.66, "learning_rate": 2.3831662842651856e-05, "loss": 0.3016, "step": 4334, "task_loss": 0.7494133114814758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3713572025299072, "epoch": 3.66, "learning_rate": 2.3825624924526023e-05, "loss": 0.4754, "step": 4335, "task_loss": 0.6533140540122986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2257539927959442, "epoch": 3.66, "learning_rate": 2.3819587006400194e-05, "loss": 0.3541, "step": 4336, "task_loss": 0.08502235263586044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2338886260986328, "epoch": 3.67, "learning_rate": 2.3813549088274364e-05, "loss": 0.4258, "step": 4337, "task_loss": 0.5852758884429932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6030504703521729, "epoch": 3.67, "learning_rate": 2.3807511170148535e-05, "loss": 0.5272, "step": 4338, "task_loss": 1.1149274110794067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41849595308303833, "epoch": 3.67, "learning_rate": 2.3801473252022705e-05, "loss": 0.3292, "step": 4339, "task_loss": 0.06931182742118835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5193805694580078, "epoch": 3.67, "learning_rate": 2.3795435333896873e-05, "loss": 0.4798, "step": 4340, "task_loss": 0.29875683784484863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31274259090423584, "epoch": 3.67, "learning_rate": 2.3789397415771043e-05, "loss": 0.5297, "step": 4341, "task_loss": 0.21262910962104797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31475281715393066, "epoch": 3.67, "learning_rate": 2.3783359497645214e-05, "loss": 0.5675, "step": 4342, "task_loss": 0.19850346446037292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44890302419662476, "epoch": 3.67, "learning_rate": 2.377732157951938e-05, "loss": 0.4724, "step": 4343, "task_loss": 0.3347076177597046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3019377291202545, "epoch": 3.67, "learning_rate": 2.3771283661393555e-05, "loss": 0.3943, "step": 4344, "task_loss": 1.1505064964294434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3252951502799988, "epoch": 3.67, "learning_rate": 2.3765245743267722e-05, "loss": 0.3871, "step": 4345, "task_loss": 0.18846963346004486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24716635048389435, "epoch": 3.67, "learning_rate": 2.375920782514189e-05, "loss": 0.3755, "step": 4346, "task_loss": 0.4641077518463135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38405072689056396, "epoch": 3.67, "learning_rate": 2.3753169907016063e-05, "loss": 0.3856, "step": 4347, "task_loss": 0.4858514666557312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7171549201011658, "epoch": 3.67, "learning_rate": 2.374713198889023e-05, "loss": 0.6162, "step": 4348, "task_loss": 0.24083614349365234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8512578010559082, "epoch": 3.68, "learning_rate": 2.37410940707644e-05, "loss": 0.6455, "step": 4349, "task_loss": 1.1528807878494263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2988833785057068, "epoch": 3.68, "learning_rate": 2.373505615263857e-05, "loss": 0.3243, "step": 4350, "task_loss": 0.5365366339683533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6797757148742676, "epoch": 3.68, "learning_rate": 2.372901823451274e-05, "loss": 0.4957, "step": 4351, "task_loss": 0.4292236268520355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4648118019104004, "epoch": 3.68, "learning_rate": 2.3722980316386913e-05, "loss": 0.5921, "step": 4352, "task_loss": 0.5039355158805847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6312812566757202, "epoch": 3.68, "learning_rate": 2.371694239826108e-05, "loss": 0.5508, "step": 4353, "task_loss": 0.8707351088523865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8250569105148315, "epoch": 3.68, "learning_rate": 2.371090448013525e-05, "loss": 0.5754, "step": 4354, "task_loss": 1.1599047183990479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3849352300167084, "epoch": 3.68, "learning_rate": 2.370486656200942e-05, "loss": 0.4831, "step": 4355, "task_loss": 0.8635945916175842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3376413881778717, "epoch": 3.68, "learning_rate": 2.3698828643883588e-05, "loss": 0.4776, "step": 4356, "task_loss": 1.2000855207443237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40392011404037476, "epoch": 3.68, "learning_rate": 2.369279072575776e-05, "loss": 0.4506, "step": 4357, "task_loss": 0.6116074323654175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4804266691207886, "epoch": 3.68, "learning_rate": 2.368675280763193e-05, "loss": 0.3634, "step": 4358, "task_loss": 0.432324081659317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.322414755821228, "epoch": 3.68, "learning_rate": 2.36807148895061e-05, "loss": 0.4746, "step": 4359, "task_loss": 0.7200769186019897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3177717924118042, "epoch": 3.69, "learning_rate": 2.367467697138027e-05, "loss": 0.336, "step": 4360, "task_loss": 0.12427821755409241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2184416949748993, "epoch": 3.69, "learning_rate": 2.3668639053254438e-05, "loss": 0.4457, "step": 4361, "task_loss": 0.5366232395172119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33410438895225525, "epoch": 3.69, "learning_rate": 2.3662601135128608e-05, "loss": 0.414, "step": 4362, "task_loss": 1.0630055665969849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4663856029510498, "epoch": 3.69, "learning_rate": 2.365656321700278e-05, "loss": 0.4426, "step": 4363, "task_loss": 0.7049210071563721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6157823801040649, "epoch": 3.69, "learning_rate": 2.365052529887695e-05, "loss": 0.4255, "step": 4364, "task_loss": 0.32992950081825256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7448548078536987, "epoch": 3.69, "learning_rate": 2.3644487380751117e-05, "loss": 0.5023, "step": 4365, "task_loss": 0.898432731628418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2534962296485901, "epoch": 3.69, "learning_rate": 2.3638449462625287e-05, "loss": 0.5217, "step": 4366, "task_loss": 0.8499137163162231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3841888904571533, "epoch": 3.69, "learning_rate": 2.3632411544499458e-05, "loss": 0.3775, "step": 4367, "task_loss": 0.5330405235290527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3525848984718323, "epoch": 3.69, "learning_rate": 2.3626373626373628e-05, "loss": 0.3834, "step": 4368, "task_loss": 0.6272526383399963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4043210744857788, "epoch": 3.69, "learning_rate": 2.36203357082478e-05, "loss": 0.3918, "step": 4369, "task_loss": 0.7615174055099487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4039674401283264, "epoch": 3.69, "learning_rate": 2.3614297790121966e-05, "loss": 0.4816, "step": 4370, "task_loss": 0.7429620027542114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4781983494758606, "epoch": 3.69, "learning_rate": 2.3608259871996137e-05, "loss": 0.4585, "step": 4371, "task_loss": 0.09657207876443863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3848237991333008, "epoch": 3.7, "learning_rate": 2.3602221953870307e-05, "loss": 0.338, "step": 4372, "task_loss": 0.8775408267974854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6649436950683594, "epoch": 3.7, "learning_rate": 2.3596184035744474e-05, "loss": 0.4736, "step": 4373, "task_loss": 0.6305364966392517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4046058654785156, "epoch": 3.7, "learning_rate": 2.3590146117618648e-05, "loss": 0.3563, "step": 4374, "task_loss": 0.20991548895835876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7319790124893188, "epoch": 3.7, "learning_rate": 2.3584108199492815e-05, "loss": 0.5421, "step": 4375, "task_loss": 2.056255340576172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35291677713394165, "epoch": 3.7, "learning_rate": 2.3578070281366986e-05, "loss": 0.5076, "step": 4376, "task_loss": 0.546640932559967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3800932466983795, "epoch": 3.7, "learning_rate": 2.3572032363241157e-05, "loss": 0.432, "step": 4377, "task_loss": 0.972039520740509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3348750174045563, "epoch": 3.7, "learning_rate": 2.3565994445115324e-05, "loss": 0.5205, "step": 4378, "task_loss": 0.8594505190849304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4044131338596344, "epoch": 3.7, "learning_rate": 2.3559956526989498e-05, "loss": 0.4654, "step": 4379, "task_loss": 0.2622767984867096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2874598801136017, "epoch": 3.7, "learning_rate": 2.3553918608863665e-05, "loss": 0.4795, "step": 4380, "task_loss": 0.49358901381492615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22948315739631653, "epoch": 3.7, "learning_rate": 2.3547880690737832e-05, "loss": 0.3818, "step": 4381, "task_loss": 0.09839097410440445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24589094519615173, "epoch": 3.7, "learning_rate": 2.3541842772612006e-05, "loss": 0.4616, "step": 4382, "task_loss": 0.1330094337463379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7037230730056763, "epoch": 3.7, "learning_rate": 2.3535804854486173e-05, "loss": 0.6756, "step": 4383, "task_loss": 0.18904176354408264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5649870038032532, "epoch": 3.71, "learning_rate": 2.3529766936360344e-05, "loss": 0.4698, "step": 4384, "task_loss": 0.30183231830596924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6310197114944458, "epoch": 3.71, "learning_rate": 2.3523729018234514e-05, "loss": 0.5218, "step": 4385, "task_loss": 1.086739420890808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5749821662902832, "epoch": 3.71, "learning_rate": 2.351769110010868e-05, "loss": 0.4979, "step": 4386, "task_loss": 0.21513479948043823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.296673059463501, "epoch": 3.71, "learning_rate": 2.3511653181982856e-05, "loss": 0.4078, "step": 4387, "task_loss": 0.8409971594810486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25388866662979126, "epoch": 3.71, "learning_rate": 2.3505615263857023e-05, "loss": 0.3422, "step": 4388, "task_loss": 0.023648925125598907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8538635969161987, "epoch": 3.71, "learning_rate": 2.3499577345731193e-05, "loss": 0.6036, "step": 4389, "task_loss": 1.3949329853057861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21222198009490967, "epoch": 3.71, "learning_rate": 2.3493539427605364e-05, "loss": 0.3104, "step": 4390, "task_loss": 0.18908753991127014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32897788286209106, "epoch": 3.71, "learning_rate": 2.348750150947953e-05, "loss": 0.4219, "step": 4391, "task_loss": 0.950526773929596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5008683204650879, "epoch": 3.71, "learning_rate": 2.34814635913537e-05, "loss": 0.5351, "step": 4392, "task_loss": 1.1296972036361694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3020428717136383, "epoch": 3.71, "learning_rate": 2.3475425673227872e-05, "loss": 0.5418, "step": 4393, "task_loss": 0.1410648673772812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34290197491645813, "epoch": 3.71, "learning_rate": 2.3469387755102043e-05, "loss": 0.4408, "step": 4394, "task_loss": 1.2387659549713135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2808294892311096, "epoch": 3.71, "learning_rate": 2.3463349836976213e-05, "loss": 0.453, "step": 4395, "task_loss": 0.4617380201816559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17627505958080292, "epoch": 3.72, "learning_rate": 2.345731191885038e-05, "loss": 0.3397, "step": 4396, "task_loss": 0.4353334605693817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48134446144104004, "epoch": 3.72, "learning_rate": 2.345127400072455e-05, "loss": 0.5411, "step": 4397, "task_loss": 0.882348895072937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5245662927627563, "epoch": 3.72, "learning_rate": 2.344523608259872e-05, "loss": 0.4261, "step": 4398, "task_loss": 0.9311525225639343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6545689702033997, "epoch": 3.72, "learning_rate": 2.3439198164472892e-05, "loss": 0.4716, "step": 4399, "task_loss": 1.2268671989440918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33713221549987793, "epoch": 3.72, "learning_rate": 2.343316024634706e-05, "loss": 0.3715, "step": 4400, "task_loss": 0.30980628728866577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5880165100097656, "epoch": 3.72, "learning_rate": 2.342712232822123e-05, "loss": 0.6082, "step": 4401, "task_loss": 1.6809124946594238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22817161679267883, "epoch": 3.72, "learning_rate": 2.34210844100954e-05, "loss": 0.4557, "step": 4402, "task_loss": 0.5300211310386658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43205124139785767, "epoch": 3.72, "learning_rate": 2.341504649196957e-05, "loss": 0.3575, "step": 4403, "task_loss": 0.34114280343055725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28139594197273254, "epoch": 3.72, "learning_rate": 2.3409008573843742e-05, "loss": 0.4589, "step": 4404, "task_loss": 0.24718612432479858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2940003573894501, "epoch": 3.72, "learning_rate": 2.340297065571791e-05, "loss": 0.3696, "step": 4405, "task_loss": 0.054060980677604675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5303086042404175, "epoch": 3.72, "learning_rate": 2.339693273759208e-05, "loss": 0.5064, "step": 4406, "task_loss": 1.0008702278137207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21501761674880981, "epoch": 3.72, "learning_rate": 2.339089481946625e-05, "loss": 0.3902, "step": 4407, "task_loss": 0.21138431131839752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40803712606430054, "epoch": 3.73, "learning_rate": 2.3384856901340417e-05, "loss": 0.53, "step": 4408, "task_loss": 0.47623884677886963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3681905269622803, "epoch": 3.73, "learning_rate": 2.337881898321459e-05, "loss": 0.38, "step": 4409, "task_loss": 0.7757147550582886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.284656822681427, "epoch": 3.73, "learning_rate": 2.337278106508876e-05, "loss": 0.5215, "step": 4410, "task_loss": 0.6524332761764526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48360955715179443, "epoch": 3.73, "learning_rate": 2.3366743146962926e-05, "loss": 0.5495, "step": 4411, "task_loss": 0.738800585269928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7366483807563782, "epoch": 3.73, "learning_rate": 2.33607052288371e-05, "loss": 0.4966, "step": 4412, "task_loss": 0.8068609833717346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3286115825176239, "epoch": 3.73, "learning_rate": 2.3354667310711267e-05, "loss": 0.419, "step": 4413, "task_loss": 1.1429532766342163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5136556625366211, "epoch": 3.73, "learning_rate": 2.3348629392585437e-05, "loss": 0.4844, "step": 4414, "task_loss": 1.039982795715332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4909369647502899, "epoch": 3.73, "learning_rate": 2.3342591474459608e-05, "loss": 0.419, "step": 4415, "task_loss": 0.4345233142375946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7650905251502991, "epoch": 3.73, "learning_rate": 2.3336553556333775e-05, "loss": 0.4033, "step": 4416, "task_loss": 0.5189441442489624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.449804425239563, "epoch": 3.73, "learning_rate": 2.333051563820795e-05, "loss": 0.4459, "step": 4417, "task_loss": 1.0539319515228271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16194483637809753, "epoch": 3.73, "learning_rate": 2.3324477720082116e-05, "loss": 0.3867, "step": 4418, "task_loss": 0.06577624380588531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4687257707118988, "epoch": 3.73, "learning_rate": 2.3318439801956287e-05, "loss": 0.4427, "step": 4419, "task_loss": 0.4052376449108124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45865878462791443, "epoch": 3.74, "learning_rate": 2.3312401883830457e-05, "loss": 0.3787, "step": 4420, "task_loss": 0.5088247060775757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5455809235572815, "epoch": 3.74, "learning_rate": 2.3306363965704624e-05, "loss": 0.4451, "step": 4421, "task_loss": 0.8769527077674866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29663652181625366, "epoch": 3.74, "learning_rate": 2.3300326047578795e-05, "loss": 0.461, "step": 4422, "task_loss": 1.0240150690078735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3607789874076843, "epoch": 3.74, "learning_rate": 2.3294288129452966e-05, "loss": 0.4149, "step": 4423, "task_loss": 1.2446478605270386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35371482372283936, "epoch": 3.74, "learning_rate": 2.3288250211327136e-05, "loss": 0.3827, "step": 4424, "task_loss": 0.7088547348976135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4361785650253296, "epoch": 3.74, "learning_rate": 2.3282212293201307e-05, "loss": 0.3707, "step": 4425, "task_loss": 1.6834126710891724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2863893210887909, "epoch": 3.74, "learning_rate": 2.3276174375075474e-05, "loss": 0.3967, "step": 4426, "task_loss": 0.22285453975200653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22680675983428955, "epoch": 3.74, "learning_rate": 2.3270136456949645e-05, "loss": 0.2994, "step": 4427, "task_loss": 0.4024110436439514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23730021715164185, "epoch": 3.74, "learning_rate": 2.3264098538823815e-05, "loss": 0.5148, "step": 4428, "task_loss": 0.5319615006446838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37918394804000854, "epoch": 3.74, "learning_rate": 2.3258060620697986e-05, "loss": 0.4095, "step": 4429, "task_loss": 0.22076989710330963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6181727051734924, "epoch": 3.74, "learning_rate": 2.3252022702572153e-05, "loss": 0.6756, "step": 4430, "task_loss": 1.352083683013916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6500645875930786, "epoch": 3.75, "learning_rate": 2.3245984784446323e-05, "loss": 0.4724, "step": 4431, "task_loss": 1.3113908767700195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5382220149040222, "epoch": 3.75, "learning_rate": 2.3239946866320494e-05, "loss": 0.4169, "step": 4432, "task_loss": 0.8610221743583679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2987130880355835, "epoch": 3.75, "learning_rate": 2.3233908948194665e-05, "loss": 0.4475, "step": 4433, "task_loss": 0.3139626383781433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4523991346359253, "epoch": 3.75, "learning_rate": 2.3227871030068835e-05, "loss": 0.4208, "step": 4434, "task_loss": 0.4997214376926422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26114004850387573, "epoch": 3.75, "learning_rate": 2.3221833111943002e-05, "loss": 0.3383, "step": 4435, "task_loss": 0.3404037654399872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4599674642086029, "epoch": 3.75, "learning_rate": 2.3215795193817173e-05, "loss": 0.4895, "step": 4436, "task_loss": 0.7792286276817322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2944963872432709, "epoch": 3.75, "learning_rate": 2.3209757275691343e-05, "loss": 0.3925, "step": 4437, "task_loss": 0.5089151263237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43262970447540283, "epoch": 3.75, "learning_rate": 2.320371935756551e-05, "loss": 0.3531, "step": 4438, "task_loss": 0.391244113445282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16138878464698792, "epoch": 3.75, "learning_rate": 2.3197681439439685e-05, "loss": 0.3675, "step": 4439, "task_loss": 0.24685081839561462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5383412837982178, "epoch": 3.75, "learning_rate": 2.3191643521313852e-05, "loss": 0.5149, "step": 4440, "task_loss": 1.5086150169372559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40144553780555725, "epoch": 3.75, "learning_rate": 2.3185605603188022e-05, "loss": 0.4574, "step": 4441, "task_loss": 0.6165393590927124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23161765933036804, "epoch": 3.75, "learning_rate": 2.3179567685062193e-05, "loss": 0.3284, "step": 4442, "task_loss": 0.24813394248485565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5264412760734558, "epoch": 3.76, "learning_rate": 2.317352976693636e-05, "loss": 0.4569, "step": 4443, "task_loss": 0.43333080410957336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3450121283531189, "epoch": 3.76, "learning_rate": 2.3167491848810534e-05, "loss": 0.4161, "step": 4444, "task_loss": 1.0490206480026245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4806835353374481, "epoch": 3.76, "learning_rate": 2.31614539306847e-05, "loss": 0.3641, "step": 4445, "task_loss": 0.9029076099395752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2493019998073578, "epoch": 3.76, "learning_rate": 2.315541601255887e-05, "loss": 0.329, "step": 4446, "task_loss": 0.11698347330093384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.68231201171875, "epoch": 3.76, "learning_rate": 2.3149378094433042e-05, "loss": 0.5143, "step": 4447, "task_loss": 0.4615102708339691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3655972480773926, "epoch": 3.76, "learning_rate": 2.314334017630721e-05, "loss": 0.4627, "step": 4448, "task_loss": 1.3994730710983276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24759599566459656, "epoch": 3.76, "learning_rate": 2.313730225818138e-05, "loss": 0.3897, "step": 4449, "task_loss": 0.01047065295279026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3929322361946106, "epoch": 3.76, "learning_rate": 2.313126434005555e-05, "loss": 0.478, "step": 4450, "task_loss": 1.3744184970855713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3192671239376068, "epoch": 3.76, "learning_rate": 2.3125226421929718e-05, "loss": 0.3919, "step": 4451, "task_loss": 0.7317619323730469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9407707452774048, "epoch": 3.76, "learning_rate": 2.3119188503803892e-05, "loss": 0.4891, "step": 4452, "task_loss": 0.4927315413951874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5318622589111328, "epoch": 3.76, "learning_rate": 2.311315058567806e-05, "loss": 0.4242, "step": 4453, "task_loss": 0.6155281066894531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45417162775993347, "epoch": 3.76, "learning_rate": 2.310711266755223e-05, "loss": 0.353, "step": 4454, "task_loss": 0.7112934589385986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.483846515417099, "epoch": 3.77, "learning_rate": 2.31010747494264e-05, "loss": 0.4555, "step": 4455, "task_loss": 0.0725066140294075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30103036761283875, "epoch": 3.77, "learning_rate": 2.3095036831300567e-05, "loss": 0.4934, "step": 4456, "task_loss": 0.7189642786979675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4720456004142761, "epoch": 3.77, "learning_rate": 2.3088998913174738e-05, "loss": 0.3942, "step": 4457, "task_loss": 0.712512731552124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3120814561843872, "epoch": 3.77, "learning_rate": 2.308296099504891e-05, "loss": 0.4404, "step": 4458, "task_loss": 0.5054025650024414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28111112117767334, "epoch": 3.77, "learning_rate": 2.307692307692308e-05, "loss": 0.4339, "step": 4459, "task_loss": 0.3224104642868042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3193526864051819, "epoch": 3.77, "learning_rate": 2.307088515879725e-05, "loss": 0.3326, "step": 4460, "task_loss": 0.5401793718338013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48293012380599976, "epoch": 3.77, "learning_rate": 2.3064847240671417e-05, "loss": 0.4702, "step": 4461, "task_loss": 0.21117843687534332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22591020166873932, "epoch": 3.77, "learning_rate": 2.3058809322545587e-05, "loss": 0.304, "step": 4462, "task_loss": 0.4067303538322449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3865288496017456, "epoch": 3.77, "learning_rate": 2.3052771404419758e-05, "loss": 0.4457, "step": 4463, "task_loss": 0.3074984550476074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6222817301750183, "epoch": 3.77, "learning_rate": 2.304673348629393e-05, "loss": 0.5741, "step": 4464, "task_loss": 1.152406096458435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4231198728084564, "epoch": 3.77, "learning_rate": 2.3040695568168096e-05, "loss": 0.4973, "step": 4465, "task_loss": 0.6776015162467957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34064993262290955, "epoch": 3.77, "learning_rate": 2.3034657650042266e-05, "loss": 0.3194, "step": 4466, "task_loss": 0.22909633815288544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3451451063156128, "epoch": 3.78, "learning_rate": 2.3028619731916437e-05, "loss": 0.4556, "step": 4467, "task_loss": 0.4165949523448944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4407603144645691, "epoch": 3.78, "learning_rate": 2.3022581813790607e-05, "loss": 0.4236, "step": 4468, "task_loss": 0.33200201392173767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8760900497436523, "epoch": 3.78, "learning_rate": 2.3016543895664778e-05, "loss": 0.553, "step": 4469, "task_loss": 2.183948278427124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22390298545360565, "epoch": 3.78, "learning_rate": 2.3010505977538945e-05, "loss": 0.3565, "step": 4470, "task_loss": 0.030613157898187637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27986350655555725, "epoch": 3.78, "learning_rate": 2.3004468059413116e-05, "loss": 0.375, "step": 4471, "task_loss": 0.3963104486465454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5986742377281189, "epoch": 3.78, "learning_rate": 2.2998430141287286e-05, "loss": 0.4964, "step": 4472, "task_loss": 0.5059744119644165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.257232666015625, "epoch": 3.78, "learning_rate": 2.2992392223161454e-05, "loss": 0.4005, "step": 4473, "task_loss": 0.5281111001968384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48660510778427124, "epoch": 3.78, "learning_rate": 2.2986354305035627e-05, "loss": 0.5017, "step": 4474, "task_loss": 0.5612062215805054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3416661024093628, "epoch": 3.78, "learning_rate": 2.2980316386909795e-05, "loss": 0.3634, "step": 4475, "task_loss": 0.06469372659921646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2328285425901413, "epoch": 3.78, "learning_rate": 2.2974278468783962e-05, "loss": 0.3906, "step": 4476, "task_loss": 0.7994225025177002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4002777338027954, "epoch": 3.78, "learning_rate": 2.2968240550658136e-05, "loss": 0.4078, "step": 4477, "task_loss": 0.573314905166626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4921528697013855, "epoch": 3.78, "learning_rate": 2.2962202632532303e-05, "loss": 0.4298, "step": 4478, "task_loss": 0.7573316693305969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42830392718315125, "epoch": 3.79, "learning_rate": 2.2956164714406474e-05, "loss": 0.3788, "step": 4479, "task_loss": 0.3268314599990845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32846397161483765, "epoch": 3.79, "learning_rate": 2.2950126796280644e-05, "loss": 0.4171, "step": 4480, "task_loss": 0.03959184139966965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39068934321403503, "epoch": 3.79, "learning_rate": 2.294408887815481e-05, "loss": 0.4025, "step": 4481, "task_loss": 0.8729940056800842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32120150327682495, "epoch": 3.79, "learning_rate": 2.2938050960028985e-05, "loss": 0.441, "step": 4482, "task_loss": 0.770666241645813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4030989706516266, "epoch": 3.79, "learning_rate": 2.2932013041903152e-05, "loss": 0.3906, "step": 4483, "task_loss": 0.2866523265838623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3193625211715698, "epoch": 3.79, "learning_rate": 2.2925975123777323e-05, "loss": 0.3124, "step": 4484, "task_loss": 0.8997069001197815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4227243661880493, "epoch": 3.79, "learning_rate": 2.2919937205651494e-05, "loss": 0.4547, "step": 4485, "task_loss": 0.6816730499267578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6299500465393066, "epoch": 3.79, "learning_rate": 2.291389928752566e-05, "loss": 0.3968, "step": 4486, "task_loss": 1.3028727769851685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5952462553977966, "epoch": 3.79, "learning_rate": 2.290786136939983e-05, "loss": 0.428, "step": 4487, "task_loss": 1.4116227626800537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4255535304546356, "epoch": 3.79, "learning_rate": 2.2901823451274002e-05, "loss": 0.4871, "step": 4488, "task_loss": 1.112866997718811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24539028108119965, "epoch": 3.79, "learning_rate": 2.2895785533148172e-05, "loss": 0.3044, "step": 4489, "task_loss": 0.4485609233379364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3767116069793701, "epoch": 3.79, "learning_rate": 2.2889747615022343e-05, "loss": 0.4163, "step": 4490, "task_loss": 0.8184400796890259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25703781843185425, "epoch": 3.8, "learning_rate": 2.288370969689651e-05, "loss": 0.3479, "step": 4491, "task_loss": 0.6719023585319519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37930458784103394, "epoch": 3.8, "learning_rate": 2.287767177877068e-05, "loss": 0.4488, "step": 4492, "task_loss": 0.9392268061637878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5971359610557556, "epoch": 3.8, "learning_rate": 2.287163386064485e-05, "loss": 0.4413, "step": 4493, "task_loss": 1.12491774559021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3376919627189636, "epoch": 3.8, "learning_rate": 2.286559594251902e-05, "loss": 0.4294, "step": 4494, "task_loss": 0.682693600654602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34694617986679077, "epoch": 3.8, "learning_rate": 2.285955802439319e-05, "loss": 0.2941, "step": 4495, "task_loss": 0.5613043308258057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3248623311519623, "epoch": 3.8, "learning_rate": 2.285352010626736e-05, "loss": 0.3678, "step": 4496, "task_loss": 0.8972628712654114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3311727046966553, "epoch": 3.8, "learning_rate": 2.284748218814153e-05, "loss": 0.4019, "step": 4497, "task_loss": 0.48966822028160095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31371796131134033, "epoch": 3.8, "learning_rate": 2.28414442700157e-05, "loss": 0.2784, "step": 4498, "task_loss": 0.2682132124900818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2827150821685791, "epoch": 3.8, "learning_rate": 2.2835406351889868e-05, "loss": 0.3914, "step": 4499, "task_loss": 0.15594641864299774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3399125337600708, "epoch": 3.8, "learning_rate": 2.282936843376404e-05, "loss": 0.4632, "step": 4500, "task_loss": 1.2824560403823853 }, { "epoch": 3.8, "eval_accuracy": 0.9117623762376238, "eval_loss": 0.26523923873901367, "eval_runtime": 319.6136, "eval_samples_per_second": 79.002, "eval_steps_per_second": 0.619, "step": 4500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49121779203414917, "epoch": 3.8, "learning_rate": 2.282333051563821e-05, "loss": 0.4234, "step": 4501, "task_loss": 0.3431094288825989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40133821964263916, "epoch": 3.81, "learning_rate": 2.281729259751238e-05, "loss": 0.4605, "step": 4502, "task_loss": 0.25890740752220154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37246033549308777, "epoch": 3.81, "learning_rate": 2.2811254679386547e-05, "loss": 0.3379, "step": 4503, "task_loss": 0.44015491008758545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32712459564208984, "epoch": 3.81, "learning_rate": 2.2805216761260718e-05, "loss": 0.3606, "step": 4504, "task_loss": 0.25454261898994446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22644606232643127, "epoch": 3.81, "learning_rate": 2.2799178843134888e-05, "loss": 0.4313, "step": 4505, "task_loss": 0.49812236428260803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3885538578033447, "epoch": 3.81, "learning_rate": 2.279314092500906e-05, "loss": 0.4541, "step": 4506, "task_loss": 1.1067016124725342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5814234018325806, "epoch": 3.81, "learning_rate": 2.278710300688323e-05, "loss": 0.3742, "step": 4507, "task_loss": 1.3348160982131958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18660926818847656, "epoch": 3.81, "learning_rate": 2.2781065088757396e-05, "loss": 0.3376, "step": 4508, "task_loss": 0.5666822195053101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49987122416496277, "epoch": 3.81, "learning_rate": 2.2775027170631567e-05, "loss": 0.4515, "step": 4509, "task_loss": 0.22444701194763184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7499920129776001, "epoch": 3.81, "learning_rate": 2.2768989252505738e-05, "loss": 0.4514, "step": 4510, "task_loss": 1.5928995609283447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6692479252815247, "epoch": 3.81, "learning_rate": 2.2762951334379905e-05, "loss": 0.5026, "step": 4511, "task_loss": 0.5001580119132996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5734476447105408, "epoch": 3.81, "learning_rate": 2.275691341625408e-05, "loss": 0.4368, "step": 4512, "task_loss": 0.48190340399742126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20368336141109467, "epoch": 3.81, "learning_rate": 2.2750875498128246e-05, "loss": 0.3416, "step": 4513, "task_loss": 0.46848657727241516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44388407468795776, "epoch": 3.82, "learning_rate": 2.2744837580002416e-05, "loss": 0.4584, "step": 4514, "task_loss": 0.5682546496391296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24185648560523987, "epoch": 3.82, "learning_rate": 2.2738799661876587e-05, "loss": 0.3047, "step": 4515, "task_loss": 0.8054791688919067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2658907175064087, "epoch": 3.82, "learning_rate": 2.2732761743750754e-05, "loss": 0.3501, "step": 4516, "task_loss": 0.16551204025745392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2693837881088257, "epoch": 3.82, "learning_rate": 2.2726723825624928e-05, "loss": 0.3802, "step": 4517, "task_loss": 0.8151431083679199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17076629400253296, "epoch": 3.82, "learning_rate": 2.2720685907499095e-05, "loss": 0.3083, "step": 4518, "task_loss": 0.6143295764923096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35836589336395264, "epoch": 3.82, "learning_rate": 2.2714647989373263e-05, "loss": 0.3702, "step": 4519, "task_loss": 0.5651238560676575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2859874963760376, "epoch": 3.82, "learning_rate": 2.2708610071247436e-05, "loss": 0.3212, "step": 4520, "task_loss": 0.29503804445266724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27763664722442627, "epoch": 3.82, "learning_rate": 2.2702572153121604e-05, "loss": 0.3834, "step": 4521, "task_loss": 1.2678570747375488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4358211159706116, "epoch": 3.82, "learning_rate": 2.2696534234995774e-05, "loss": 0.3472, "step": 4522, "task_loss": 0.07309378683567047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40780240297317505, "epoch": 3.82, "learning_rate": 2.2690496316869945e-05, "loss": 0.4215, "step": 4523, "task_loss": 0.24629633128643036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4086659550666809, "epoch": 3.82, "learning_rate": 2.2684458398744112e-05, "loss": 0.3583, "step": 4524, "task_loss": 0.8471210598945618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4191887080669403, "epoch": 3.82, "learning_rate": 2.2678420480618286e-05, "loss": 0.4002, "step": 4525, "task_loss": 0.3444390594959259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3817553222179413, "epoch": 3.83, "learning_rate": 2.2672382562492453e-05, "loss": 0.4606, "step": 4526, "task_loss": 0.8078684210777283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35381996631622314, "epoch": 3.83, "learning_rate": 2.2666344644366624e-05, "loss": 0.3502, "step": 4527, "task_loss": 0.6209245920181274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4206717610359192, "epoch": 3.83, "learning_rate": 2.2660306726240794e-05, "loss": 0.4381, "step": 4528, "task_loss": 0.9576494693756104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.662473201751709, "epoch": 3.83, "learning_rate": 2.265426880811496e-05, "loss": 0.463, "step": 4529, "task_loss": 0.295690655708313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47787389159202576, "epoch": 3.83, "learning_rate": 2.2648230889989132e-05, "loss": 0.4664, "step": 4530, "task_loss": 0.2990244925022125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5561640858650208, "epoch": 3.83, "learning_rate": 2.2642192971863303e-05, "loss": 0.502, "step": 4531, "task_loss": 1.0383234024047852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20464058220386505, "epoch": 3.83, "learning_rate": 2.2636155053737473e-05, "loss": 0.4393, "step": 4532, "task_loss": 0.0776558518409729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5798423290252686, "epoch": 3.83, "learning_rate": 2.263011713561164e-05, "loss": 0.4501, "step": 4533, "task_loss": 0.624987006187439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5466371178627014, "epoch": 3.83, "learning_rate": 2.262407921748581e-05, "loss": 0.4311, "step": 4534, "task_loss": 0.3565341830253601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4459155797958374, "epoch": 3.83, "learning_rate": 2.261804129935998e-05, "loss": 0.4891, "step": 4535, "task_loss": 1.56988525390625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2268802523612976, "epoch": 3.83, "learning_rate": 2.2612003381234152e-05, "loss": 0.294, "step": 4536, "task_loss": 0.3589562177658081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4136229455471039, "epoch": 3.83, "learning_rate": 2.2605965463108323e-05, "loss": 0.36, "step": 4537, "task_loss": 0.8117765188217163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30540794134140015, "epoch": 3.84, "learning_rate": 2.259992754498249e-05, "loss": 0.4004, "step": 4538, "task_loss": 0.4380282461643219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42063724994659424, "epoch": 3.84, "learning_rate": 2.259388962685666e-05, "loss": 0.4358, "step": 4539, "task_loss": 1.085681438446045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2509828805923462, "epoch": 3.84, "learning_rate": 2.258785170873083e-05, "loss": 0.4032, "step": 4540, "task_loss": 0.7311223149299622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2738845944404602, "epoch": 3.84, "learning_rate": 2.2581813790604998e-05, "loss": 0.3446, "step": 4541, "task_loss": 0.4651325047016144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.464905321598053, "epoch": 3.84, "learning_rate": 2.2575775872479172e-05, "loss": 0.377, "step": 4542, "task_loss": 0.12570375204086304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45459067821502686, "epoch": 3.84, "learning_rate": 2.256973795435334e-05, "loss": 0.43, "step": 4543, "task_loss": 0.7658205032348633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3637661039829254, "epoch": 3.84, "learning_rate": 2.256370003622751e-05, "loss": 0.4863, "step": 4544, "task_loss": 0.6486446857452393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33510515093803406, "epoch": 3.84, "learning_rate": 2.255766211810168e-05, "loss": 0.5027, "step": 4545, "task_loss": 0.16016295552253723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4136231243610382, "epoch": 3.84, "learning_rate": 2.2551624199975848e-05, "loss": 0.4547, "step": 4546, "task_loss": 0.9180144667625427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5331814289093018, "epoch": 3.84, "learning_rate": 2.254558628185002e-05, "loss": 0.5249, "step": 4547, "task_loss": 0.7658040523529053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5446820855140686, "epoch": 3.84, "learning_rate": 2.253954836372419e-05, "loss": 0.4193, "step": 4548, "task_loss": 1.1017026901245117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.323249876499176, "epoch": 3.84, "learning_rate": 2.2533510445598356e-05, "loss": 0.3708, "step": 4549, "task_loss": 0.6945792436599731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5181764960289001, "epoch": 3.85, "learning_rate": 2.252747252747253e-05, "loss": 0.5503, "step": 4550, "task_loss": 0.671029806137085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3329160213470459, "epoch": 3.85, "learning_rate": 2.2521434609346697e-05, "loss": 0.4123, "step": 4551, "task_loss": 0.4562135338783264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29990050196647644, "epoch": 3.85, "learning_rate": 2.2515396691220868e-05, "loss": 0.381, "step": 4552, "task_loss": 0.7497788667678833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38282978534698486, "epoch": 3.85, "learning_rate": 2.2509358773095038e-05, "loss": 0.4421, "step": 4553, "task_loss": 1.1743263006210327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3478439748287201, "epoch": 3.85, "learning_rate": 2.2503320854969205e-05, "loss": 0.3594, "step": 4554, "task_loss": 0.6541143655776978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5587910413742065, "epoch": 3.85, "learning_rate": 2.249728293684338e-05, "loss": 0.5707, "step": 4555, "task_loss": 1.3833286762237549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31117892265319824, "epoch": 3.85, "learning_rate": 2.2491245018717547e-05, "loss": 0.3678, "step": 4556, "task_loss": 0.22123612463474274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5289948582649231, "epoch": 3.85, "learning_rate": 2.2485207100591717e-05, "loss": 0.3726, "step": 4557, "task_loss": 0.41705721616744995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23962008953094482, "epoch": 3.85, "learning_rate": 2.2479169182465888e-05, "loss": 0.3683, "step": 4558, "task_loss": 0.1589265614748001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35992154479026794, "epoch": 3.85, "learning_rate": 2.2473131264340055e-05, "loss": 0.4638, "step": 4559, "task_loss": 0.7437750697135925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5524823665618896, "epoch": 3.85, "learning_rate": 2.2467093346214225e-05, "loss": 0.5519, "step": 4560, "task_loss": 1.2695260047912598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2544724643230438, "epoch": 3.85, "learning_rate": 2.2461055428088396e-05, "loss": 0.3513, "step": 4561, "task_loss": 0.2762516438961029 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4712927043437958, "epoch": 3.86, "learning_rate": 2.2455017509962567e-05, "loss": 0.361, "step": 4562, "task_loss": 0.24358876049518585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43436428904533386, "epoch": 3.86, "learning_rate": 2.2448979591836737e-05, "loss": 0.4755, "step": 4563, "task_loss": 0.5310885906219482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32535743713378906, "epoch": 3.86, "learning_rate": 2.2442941673710904e-05, "loss": 0.3652, "step": 4564, "task_loss": 0.26494812965393066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16143882274627686, "epoch": 3.86, "learning_rate": 2.2436903755585075e-05, "loss": 0.5311, "step": 4565, "task_loss": 0.025650667026638985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25477030873298645, "epoch": 3.86, "learning_rate": 2.2430865837459245e-05, "loss": 0.4935, "step": 4566, "task_loss": 0.42623433470726013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39082804322242737, "epoch": 3.86, "learning_rate": 2.2424827919333416e-05, "loss": 0.3894, "step": 4567, "task_loss": 0.2637755870819092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33813416957855225, "epoch": 3.86, "learning_rate": 2.2418790001207583e-05, "loss": 0.4258, "step": 4568, "task_loss": 0.5402578115463257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1429792046546936, "epoch": 3.86, "learning_rate": 2.2412752083081754e-05, "loss": 0.3804, "step": 4569, "task_loss": 0.04993908852338791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47732698917388916, "epoch": 3.86, "learning_rate": 2.2406714164955924e-05, "loss": 0.4968, "step": 4570, "task_loss": 0.5581582188606262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27817824482917786, "epoch": 3.86, "learning_rate": 2.2400676246830095e-05, "loss": 0.3638, "step": 4571, "task_loss": 0.8840863704681396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3191303014755249, "epoch": 3.86, "learning_rate": 2.2394638328704266e-05, "loss": 0.4755, "step": 4572, "task_loss": 0.2756945490837097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41025930643081665, "epoch": 3.87, "learning_rate": 2.2388600410578433e-05, "loss": 0.4996, "step": 4573, "task_loss": 0.9710268974304199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31178176403045654, "epoch": 3.87, "learning_rate": 2.2382562492452603e-05, "loss": 0.4869, "step": 4574, "task_loss": 0.6711894273757935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5178455710411072, "epoch": 3.87, "learning_rate": 2.2376524574326774e-05, "loss": 0.3906, "step": 4575, "task_loss": 0.48359864950180054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25223997235298157, "epoch": 3.87, "learning_rate": 2.237048665620094e-05, "loss": 0.3706, "step": 4576, "task_loss": 0.6418231725692749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39925166964530945, "epoch": 3.87, "learning_rate": 2.2364448738075115e-05, "loss": 0.4058, "step": 4577, "task_loss": 0.7003591656684875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31899988651275635, "epoch": 3.87, "learning_rate": 2.2358410819949282e-05, "loss": 0.3293, "step": 4578, "task_loss": 0.675621747970581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4933909773826599, "epoch": 3.87, "learning_rate": 2.2352372901823453e-05, "loss": 0.4695, "step": 4579, "task_loss": 1.1559746265411377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22669415175914764, "epoch": 3.87, "learning_rate": 2.2346334983697623e-05, "loss": 0.4922, "step": 4580, "task_loss": 0.510589063167572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5563425421714783, "epoch": 3.87, "learning_rate": 2.234029706557179e-05, "loss": 0.4182, "step": 4581, "task_loss": 0.1703505516052246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4804747700691223, "epoch": 3.87, "learning_rate": 2.2334259147445964e-05, "loss": 0.5512, "step": 4582, "task_loss": 0.16490992903709412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5403862595558167, "epoch": 3.87, "learning_rate": 2.232822122932013e-05, "loss": 0.3418, "step": 4583, "task_loss": 0.33137083053588867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49916717410087585, "epoch": 3.87, "learning_rate": 2.23221833111943e-05, "loss": 0.4687, "step": 4584, "task_loss": 0.5807883739471436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29574957489967346, "epoch": 3.88, "learning_rate": 2.2316145393068473e-05, "loss": 0.4038, "step": 4585, "task_loss": 0.9633548855781555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8031378984451294, "epoch": 3.88, "learning_rate": 2.231010747494264e-05, "loss": 0.4962, "step": 4586, "task_loss": 0.7583884596824646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3305168151855469, "epoch": 3.88, "learning_rate": 2.230406955681681e-05, "loss": 0.4431, "step": 4587, "task_loss": 0.04671880230307579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47432243824005127, "epoch": 3.88, "learning_rate": 2.229803163869098e-05, "loss": 0.3931, "step": 4588, "task_loss": 0.4737328290939331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4902791380882263, "epoch": 3.88, "learning_rate": 2.2291993720565148e-05, "loss": 0.4279, "step": 4589, "task_loss": 1.4560551643371582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49143290519714355, "epoch": 3.88, "learning_rate": 2.2285955802439322e-05, "loss": 0.5242, "step": 4590, "task_loss": 0.8719483017921448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3646913468837738, "epoch": 3.88, "learning_rate": 2.227991788431349e-05, "loss": 0.3952, "step": 4591, "task_loss": 1.1548696756362915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.512373685836792, "epoch": 3.88, "learning_rate": 2.227387996618766e-05, "loss": 0.556, "step": 4592, "task_loss": 1.4534318447113037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5748465061187744, "epoch": 3.88, "learning_rate": 2.226784204806183e-05, "loss": 0.3946, "step": 4593, "task_loss": 1.2464574575424194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7554500699043274, "epoch": 3.88, "learning_rate": 2.2261804129935998e-05, "loss": 0.508, "step": 4594, "task_loss": 1.1655534505844116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3592744469642639, "epoch": 3.88, "learning_rate": 2.225576621181017e-05, "loss": 0.4757, "step": 4595, "task_loss": 0.9114640951156616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44031283259391785, "epoch": 3.88, "learning_rate": 2.224972829368434e-05, "loss": 0.4786, "step": 4596, "task_loss": 1.7737435102462769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7256303429603577, "epoch": 3.89, "learning_rate": 2.224369037555851e-05, "loss": 0.4396, "step": 4597, "task_loss": 0.548713743686676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23623117804527283, "epoch": 3.89, "learning_rate": 2.2237652457432677e-05, "loss": 0.3654, "step": 4598, "task_loss": 0.10148685425519943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7915703058242798, "epoch": 3.89, "learning_rate": 2.2231614539306847e-05, "loss": 0.4273, "step": 4599, "task_loss": 0.533987820148468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46658575534820557, "epoch": 3.89, "learning_rate": 2.2225576621181018e-05, "loss": 0.3238, "step": 4600, "task_loss": 0.9711079597473145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4219766855239868, "epoch": 3.89, "learning_rate": 2.221953870305519e-05, "loss": 0.3797, "step": 4601, "task_loss": 0.44473257660865784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3183036148548126, "epoch": 3.89, "learning_rate": 2.221350078492936e-05, "loss": 0.4221, "step": 4602, "task_loss": 0.35986870527267456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30251285433769226, "epoch": 3.89, "learning_rate": 2.2207462866803526e-05, "loss": 0.4909, "step": 4603, "task_loss": 0.08503981679677963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5725367665290833, "epoch": 3.89, "learning_rate": 2.2201424948677697e-05, "loss": 0.4222, "step": 4604, "task_loss": 1.1389788389205933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4655366539955139, "epoch": 3.89, "learning_rate": 2.2195387030551867e-05, "loss": 0.4257, "step": 4605, "task_loss": 1.0703727006912231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3696538209915161, "epoch": 3.89, "learning_rate": 2.2189349112426034e-05, "loss": 0.5288, "step": 4606, "task_loss": 0.3168405592441559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5059281587600708, "epoch": 3.89, "learning_rate": 2.218331119430021e-05, "loss": 0.4817, "step": 4607, "task_loss": 0.4364737570285797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2418578416109085, "epoch": 3.89, "learning_rate": 2.2177273276174376e-05, "loss": 0.4173, "step": 4608, "task_loss": 0.2359144687652588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38668906688690186, "epoch": 3.9, "learning_rate": 2.2171235358048546e-05, "loss": 0.4174, "step": 4609, "task_loss": 0.5336436629295349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3685420751571655, "epoch": 3.9, "learning_rate": 2.2165197439922717e-05, "loss": 0.4571, "step": 4610, "task_loss": 0.6952165961265564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4915848970413208, "epoch": 3.9, "learning_rate": 2.2159159521796884e-05, "loss": 0.3862, "step": 4611, "task_loss": 0.7067809700965881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5225868225097656, "epoch": 3.9, "learning_rate": 2.2153121603671058e-05, "loss": 0.4001, "step": 4612, "task_loss": 0.517489492893219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2597808837890625, "epoch": 3.9, "learning_rate": 2.2147083685545225e-05, "loss": 0.3737, "step": 4613, "task_loss": 0.3930809497833252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3024147152900696, "epoch": 3.9, "learning_rate": 2.2141045767419392e-05, "loss": 0.4497, "step": 4614, "task_loss": 0.516047477722168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4549037218093872, "epoch": 3.9, "learning_rate": 2.2135007849293566e-05, "loss": 0.4064, "step": 4615, "task_loss": 0.5303601026535034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4413997530937195, "epoch": 3.9, "learning_rate": 2.2128969931167733e-05, "loss": 0.4991, "step": 4616, "task_loss": 0.4768560230731964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29474443197250366, "epoch": 3.9, "learning_rate": 2.2122932013041904e-05, "loss": 0.4747, "step": 4617, "task_loss": 1.0565974712371826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3030339181423187, "epoch": 3.9, "learning_rate": 2.2116894094916075e-05, "loss": 0.4593, "step": 4618, "task_loss": 0.7938761711120605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2412015199661255, "epoch": 3.9, "learning_rate": 2.2110856176790242e-05, "loss": 0.4054, "step": 4619, "task_loss": 0.7590383887290955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42325860261917114, "epoch": 3.9, "learning_rate": 2.2104818258664416e-05, "loss": 0.4034, "step": 4620, "task_loss": 1.0260077714920044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6733008027076721, "epoch": 3.91, "learning_rate": 2.2098780340538583e-05, "loss": 0.5542, "step": 4621, "task_loss": 0.171078160405159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.627395749092102, "epoch": 3.91, "learning_rate": 2.2092742422412753e-05, "loss": 0.5866, "step": 4622, "task_loss": 0.8791472911834717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3768913745880127, "epoch": 3.91, "learning_rate": 2.2086704504286924e-05, "loss": 0.5902, "step": 4623, "task_loss": 1.2524511814117432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42943039536476135, "epoch": 3.91, "learning_rate": 2.208066658616109e-05, "loss": 0.4062, "step": 4624, "task_loss": 0.5341850519180298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6393450498580933, "epoch": 3.91, "learning_rate": 2.2074628668035262e-05, "loss": 0.5676, "step": 4625, "task_loss": 1.0349781513214111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26923444867134094, "epoch": 3.91, "learning_rate": 2.2068590749909432e-05, "loss": 0.348, "step": 4626, "task_loss": 0.5974827408790588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7018387317657471, "epoch": 3.91, "learning_rate": 2.2062552831783603e-05, "loss": 0.5294, "step": 4627, "task_loss": 0.7117537260055542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4203487038612366, "epoch": 3.91, "learning_rate": 2.2056514913657773e-05, "loss": 0.3979, "step": 4628, "task_loss": 1.3005664348602295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33451342582702637, "epoch": 3.91, "learning_rate": 2.205047699553194e-05, "loss": 0.4655, "step": 4629, "task_loss": 1.51963210105896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6836137771606445, "epoch": 3.91, "learning_rate": 2.204443907740611e-05, "loss": 0.442, "step": 4630, "task_loss": 0.9047869443893433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3805224895477295, "epoch": 3.91, "learning_rate": 2.2038401159280282e-05, "loss": 0.4554, "step": 4631, "task_loss": 0.4351482391357422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4780353307723999, "epoch": 3.91, "learning_rate": 2.2032363241154452e-05, "loss": 0.4637, "step": 4632, "task_loss": 1.0591700077056885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34727558493614197, "epoch": 3.92, "learning_rate": 2.202632532302862e-05, "loss": 0.4314, "step": 4633, "task_loss": 0.7087591886520386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21485263109207153, "epoch": 3.92, "learning_rate": 2.202028740490279e-05, "loss": 0.3874, "step": 4634, "task_loss": 0.09541164338588715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3084535300731659, "epoch": 3.92, "learning_rate": 2.201424948677696e-05, "loss": 0.3994, "step": 4635, "task_loss": 0.740850567817688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4362969398498535, "epoch": 3.92, "learning_rate": 2.200821156865113e-05, "loss": 0.4549, "step": 4636, "task_loss": 0.19366219639778137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48312973976135254, "epoch": 3.92, "learning_rate": 2.2002173650525302e-05, "loss": 0.4452, "step": 4637, "task_loss": 1.2608119249343872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3753194808959961, "epoch": 3.92, "learning_rate": 2.199613573239947e-05, "loss": 0.4438, "step": 4638, "task_loss": 0.4658428728580475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18991604447364807, "epoch": 3.92, "learning_rate": 2.199009781427364e-05, "loss": 0.442, "step": 4639, "task_loss": 0.032667629420757294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2879599332809448, "epoch": 3.92, "learning_rate": 2.198405989614781e-05, "loss": 0.2711, "step": 4640, "task_loss": 0.5322995781898499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25245946645736694, "epoch": 3.92, "learning_rate": 2.1978021978021977e-05, "loss": 0.3516, "step": 4641, "task_loss": 0.25367921590805054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21943213045597076, "epoch": 3.92, "learning_rate": 2.197198405989615e-05, "loss": 0.4394, "step": 4642, "task_loss": 0.022075967863202095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23929435014724731, "epoch": 3.92, "learning_rate": 2.196594614177032e-05, "loss": 0.62, "step": 4643, "task_loss": 0.718351423740387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2951848804950714, "epoch": 3.93, "learning_rate": 2.195990822364449e-05, "loss": 0.2928, "step": 4644, "task_loss": 0.163901224732399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.584557831287384, "epoch": 3.93, "learning_rate": 2.195387030551866e-05, "loss": 0.4777, "step": 4645, "task_loss": 0.5103973746299744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2727009654045105, "epoch": 3.93, "learning_rate": 2.1947832387392827e-05, "loss": 0.4024, "step": 4646, "task_loss": 0.13429082930088043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34626880288124084, "epoch": 3.93, "learning_rate": 2.1941794469267e-05, "loss": 0.4731, "step": 4647, "task_loss": 1.66986882686615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36264175176620483, "epoch": 3.93, "learning_rate": 2.1935756551141168e-05, "loss": 0.4523, "step": 4648, "task_loss": 0.8390028476715088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42440491914749146, "epoch": 3.93, "learning_rate": 2.1929718633015335e-05, "loss": 0.4022, "step": 4649, "task_loss": 0.19969137012958527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3163597285747528, "epoch": 3.93, "learning_rate": 2.192368071488951e-05, "loss": 0.3694, "step": 4650, "task_loss": 0.1281253695487976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42858433723449707, "epoch": 3.93, "learning_rate": 2.1917642796763676e-05, "loss": 0.4559, "step": 4651, "task_loss": 1.767720103263855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25377023220062256, "epoch": 3.93, "learning_rate": 2.1911604878637847e-05, "loss": 0.3746, "step": 4652, "task_loss": 0.0915948674082756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.555628776550293, "epoch": 3.93, "learning_rate": 2.1905566960512017e-05, "loss": 0.5057, "step": 4653, "task_loss": 0.8179232478141785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5073381662368774, "epoch": 3.93, "learning_rate": 2.1899529042386185e-05, "loss": 0.5697, "step": 4654, "task_loss": 0.31872865557670593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5210067629814148, "epoch": 3.93, "learning_rate": 2.189349112426036e-05, "loss": 0.4063, "step": 4655, "task_loss": 0.3142015039920807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3990045487880707, "epoch": 3.94, "learning_rate": 2.1887453206134526e-05, "loss": 0.4105, "step": 4656, "task_loss": 0.7057106494903564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2722366750240326, "epoch": 3.94, "learning_rate": 2.1881415288008696e-05, "loss": 0.3008, "step": 4657, "task_loss": 0.19812200963497162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44564685225486755, "epoch": 3.94, "learning_rate": 2.1875377369882867e-05, "loss": 0.3661, "step": 4658, "task_loss": 1.083390474319458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46544405817985535, "epoch": 3.94, "learning_rate": 2.1869339451757034e-05, "loss": 0.3567, "step": 4659, "task_loss": 0.12386466562747955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47965508699417114, "epoch": 3.94, "learning_rate": 2.1863301533631205e-05, "loss": 0.5314, "step": 4660, "task_loss": 1.0370897054672241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3200623393058777, "epoch": 3.94, "learning_rate": 2.1857263615505375e-05, "loss": 0.4028, "step": 4661, "task_loss": 1.489933729171753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3170211613178253, "epoch": 3.94, "learning_rate": 2.1851225697379546e-05, "loss": 0.4028, "step": 4662, "task_loss": 0.10899292677640915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4000930190086365, "epoch": 3.94, "learning_rate": 2.1845187779253713e-05, "loss": 0.3914, "step": 4663, "task_loss": 0.4306117594242096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34254106879234314, "epoch": 3.94, "learning_rate": 2.1839149861127884e-05, "loss": 0.3776, "step": 4664, "task_loss": 0.2512938380241394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36286503076553345, "epoch": 3.94, "learning_rate": 2.1833111943002054e-05, "loss": 0.3457, "step": 4665, "task_loss": 0.7918552756309509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3960860073566437, "epoch": 3.94, "learning_rate": 2.1827074024876225e-05, "loss": 0.4739, "step": 4666, "task_loss": 0.44808948040008545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4474877119064331, "epoch": 3.94, "learning_rate": 2.1821036106750395e-05, "loss": 0.4728, "step": 4667, "task_loss": 0.8038364052772522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38026177883148193, "epoch": 3.95, "learning_rate": 2.1814998188624562e-05, "loss": 0.4703, "step": 4668, "task_loss": 0.7404904961585999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5454308986663818, "epoch": 3.95, "learning_rate": 2.1808960270498733e-05, "loss": 0.5313, "step": 4669, "task_loss": 1.0469202995300293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1729302704334259, "epoch": 3.95, "learning_rate": 2.1802922352372904e-05, "loss": 0.2601, "step": 4670, "task_loss": 0.3975571095943451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41540831327438354, "epoch": 3.95, "learning_rate": 2.179688443424707e-05, "loss": 0.3333, "step": 4671, "task_loss": 0.5493925213813782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19419467449188232, "epoch": 3.95, "learning_rate": 2.179084651612124e-05, "loss": 0.377, "step": 4672, "task_loss": 0.6547345519065857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5346653461456299, "epoch": 3.95, "learning_rate": 2.1784808597995412e-05, "loss": 0.6003, "step": 4673, "task_loss": 0.8171154260635376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.388639897108078, "epoch": 3.95, "learning_rate": 2.1778770679869582e-05, "loss": 0.5365, "step": 4674, "task_loss": 0.6989145278930664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5683828592300415, "epoch": 3.95, "learning_rate": 2.1772732761743753e-05, "loss": 0.5447, "step": 4675, "task_loss": 0.484783798456192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3055304288864136, "epoch": 3.95, "learning_rate": 2.176669484361792e-05, "loss": 0.3797, "step": 4676, "task_loss": 0.820181667804718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3012204170227051, "epoch": 3.95, "learning_rate": 2.176065692549209e-05, "loss": 0.2757, "step": 4677, "task_loss": 0.5573286414146423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49892643094062805, "epoch": 3.95, "learning_rate": 2.175461900736626e-05, "loss": 0.4072, "step": 4678, "task_loss": 1.1113855838775635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4156685471534729, "epoch": 3.95, "learning_rate": 2.174858108924043e-05, "loss": 0.4498, "step": 4679, "task_loss": 0.902911365032196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.512830376625061, "epoch": 3.96, "learning_rate": 2.1742543171114602e-05, "loss": 0.4303, "step": 4680, "task_loss": 0.6050022840499878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3768694996833801, "epoch": 3.96, "learning_rate": 2.173650525298877e-05, "loss": 0.4587, "step": 4681, "task_loss": 0.267460435628891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.238009512424469, "epoch": 3.96, "learning_rate": 2.173046733486294e-05, "loss": 0.3519, "step": 4682, "task_loss": 0.37932252883911133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1914059817790985, "epoch": 3.96, "learning_rate": 2.172442941673711e-05, "loss": 0.3301, "step": 4683, "task_loss": 0.37301287055015564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6349960565567017, "epoch": 3.96, "learning_rate": 2.1718391498611278e-05, "loss": 0.4848, "step": 4684, "task_loss": 1.4277560710906982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16877023875713348, "epoch": 3.96, "learning_rate": 2.1712353580485452e-05, "loss": 0.2898, "step": 4685, "task_loss": 0.5351516008377075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3224194645881653, "epoch": 3.96, "learning_rate": 2.170631566235962e-05, "loss": 0.4604, "step": 4686, "task_loss": 1.0188924074172974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.296830415725708, "epoch": 3.96, "learning_rate": 2.1700277744233786e-05, "loss": 0.3027, "step": 4687, "task_loss": 1.141015887260437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3733927011489868, "epoch": 3.96, "learning_rate": 2.169423982610796e-05, "loss": 0.439, "step": 4688, "task_loss": 0.09814025461673737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3740171194076538, "epoch": 3.96, "learning_rate": 2.1688201907982127e-05, "loss": 0.5153, "step": 4689, "task_loss": 0.8606243133544922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4160200357437134, "epoch": 3.96, "learning_rate": 2.1682163989856298e-05, "loss": 0.4343, "step": 4690, "task_loss": 0.1901693493127823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5708118677139282, "epoch": 3.96, "learning_rate": 2.167612607173047e-05, "loss": 0.428, "step": 4691, "task_loss": 1.4941189289093018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.513900876045227, "epoch": 3.97, "learning_rate": 2.1670088153604636e-05, "loss": 0.5013, "step": 4692, "task_loss": 0.5330698490142822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44221436977386475, "epoch": 3.97, "learning_rate": 2.166405023547881e-05, "loss": 0.4749, "step": 4693, "task_loss": 0.9777353405952454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28001007437705994, "epoch": 3.97, "learning_rate": 2.1658012317352977e-05, "loss": 0.391, "step": 4694, "task_loss": 0.8125237226486206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4626730680465698, "epoch": 3.97, "learning_rate": 2.1651974399227148e-05, "loss": 0.4058, "step": 4695, "task_loss": 0.38860654830932617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5754708051681519, "epoch": 3.97, "learning_rate": 2.1645936481101318e-05, "loss": 0.4347, "step": 4696, "task_loss": 0.9418672919273376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2598324418067932, "epoch": 3.97, "learning_rate": 2.1639898562975485e-05, "loss": 0.3022, "step": 4697, "task_loss": 0.3891436457633972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3571735620498657, "epoch": 3.97, "learning_rate": 2.1633860644849656e-05, "loss": 0.3236, "step": 4698, "task_loss": 1.486340045928955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3487718105316162, "epoch": 3.97, "learning_rate": 2.1627822726723826e-05, "loss": 0.3462, "step": 4699, "task_loss": 0.5398554801940918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16560940444469452, "epoch": 3.97, "learning_rate": 2.1621784808597997e-05, "loss": 0.2691, "step": 4700, "task_loss": 0.6001610159873962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3898424804210663, "epoch": 3.97, "learning_rate": 2.1615746890472168e-05, "loss": 0.4659, "step": 4701, "task_loss": 0.7700607776641846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31866583228111267, "epoch": 3.97, "learning_rate": 2.1609708972346335e-05, "loss": 0.4049, "step": 4702, "task_loss": 0.2861076295375824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3652113378047943, "epoch": 3.97, "learning_rate": 2.1603671054220505e-05, "loss": 0.4435, "step": 4703, "task_loss": 0.22800223529338837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28986743092536926, "epoch": 3.98, "learning_rate": 2.1597633136094676e-05, "loss": 0.459, "step": 4704, "task_loss": 0.2871626913547516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6441648006439209, "epoch": 3.98, "learning_rate": 2.1591595217968846e-05, "loss": 0.4085, "step": 4705, "task_loss": 0.7876819968223572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34083619713783264, "epoch": 3.98, "learning_rate": 2.1585557299843014e-05, "loss": 0.4433, "step": 4706, "task_loss": 0.6276705861091614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3427461087703705, "epoch": 3.98, "learning_rate": 2.1579519381717184e-05, "loss": 0.3654, "step": 4707, "task_loss": 0.367641419172287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44094499945640564, "epoch": 3.98, "learning_rate": 2.1573481463591355e-05, "loss": 0.4245, "step": 4708, "task_loss": 0.5510439872741699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6008228063583374, "epoch": 3.98, "learning_rate": 2.1567443545465525e-05, "loss": 0.5283, "step": 4709, "task_loss": 0.6261876225471497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32644906640052795, "epoch": 3.98, "learning_rate": 2.1561405627339696e-05, "loss": 0.455, "step": 4710, "task_loss": 0.6956600546836853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27282220125198364, "epoch": 3.98, "learning_rate": 2.1555367709213863e-05, "loss": 0.3175, "step": 4711, "task_loss": 0.3560488224029541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4292609691619873, "epoch": 3.98, "learning_rate": 2.1549329791088034e-05, "loss": 0.5332, "step": 4712, "task_loss": 0.9570555686950684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1858888417482376, "epoch": 3.98, "learning_rate": 2.1543291872962204e-05, "loss": 0.3142, "step": 4713, "task_loss": 0.4318126440048218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3430553078651428, "epoch": 3.98, "learning_rate": 2.153725395483637e-05, "loss": 0.5502, "step": 4714, "task_loss": 0.44477319717407227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4054073691368103, "epoch": 3.99, "learning_rate": 2.1531216036710545e-05, "loss": 0.3664, "step": 4715, "task_loss": 0.40322378277778625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3309902548789978, "epoch": 3.99, "learning_rate": 2.1525178118584713e-05, "loss": 0.4688, "step": 4716, "task_loss": 0.15923725068569183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7062085866928101, "epoch": 3.99, "learning_rate": 2.1519140200458883e-05, "loss": 0.4676, "step": 4717, "task_loss": 0.4629147946834564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31093737483024597, "epoch": 3.99, "learning_rate": 2.1513102282333054e-05, "loss": 0.3882, "step": 4718, "task_loss": 0.053054023534059525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2554779350757599, "epoch": 3.99, "learning_rate": 2.150706436420722e-05, "loss": 0.4398, "step": 4719, "task_loss": 0.2713060975074768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5832722187042236, "epoch": 3.99, "learning_rate": 2.1501026446081395e-05, "loss": 0.4559, "step": 4720, "task_loss": 0.7865653038024902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25730100274086, "epoch": 3.99, "learning_rate": 2.1494988527955562e-05, "loss": 0.3904, "step": 4721, "task_loss": 0.5570638179779053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9693917036056519, "epoch": 3.99, "learning_rate": 2.148895060982973e-05, "loss": 0.5095, "step": 4722, "task_loss": 1.0933961868286133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2750369608402252, "epoch": 3.99, "learning_rate": 2.1482912691703903e-05, "loss": 0.4141, "step": 4723, "task_loss": 0.5941587090492249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3395710587501526, "epoch": 3.99, "learning_rate": 2.147687477357807e-05, "loss": 0.4595, "step": 4724, "task_loss": 0.5399947166442871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4669027626514435, "epoch": 3.99, "learning_rate": 2.147083685545224e-05, "loss": 0.4592, "step": 4725, "task_loss": 0.34324750304222107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31822261214256287, "epoch": 3.99, "learning_rate": 2.146479893732641e-05, "loss": 0.3281, "step": 4726, "task_loss": 0.46337124705314636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6072922945022583, "epoch": 4.0, "learning_rate": 2.145876101920058e-05, "loss": 0.4961, "step": 4727, "task_loss": 1.1463346481323242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3493722081184387, "epoch": 4.0, "learning_rate": 2.145272310107475e-05, "loss": 0.4666, "step": 4728, "task_loss": 1.3622829914093018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.10110736638307571, "epoch": 4.0, "learning_rate": 2.144668518294892e-05, "loss": 0.4126, "step": 4729, "task_loss": 0.04260345175862312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6699406504631042, "epoch": 4.0, "learning_rate": 2.144064726482309e-05, "loss": 0.6066, "step": 4730, "task_loss": 1.0732773542404175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5026463270187378, "epoch": 4.0, "learning_rate": 2.143460934669726e-05, "loss": 0.4407, "step": 4731, "task_loss": 1.0159082412719727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45547744631767273, "epoch": 4.0, "learning_rate": 2.1428571428571428e-05, "loss": 0.4396, "step": 4732, "task_loss": 0.24755015969276428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4969332814216614, "epoch": 4.0, "learning_rate": 2.14225335104456e-05, "loss": 0.5198, "step": 4733, "task_loss": 0.7957890629768372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36988574266433716, "epoch": 4.0, "learning_rate": 2.141649559231977e-05, "loss": 0.456, "step": 4734, "task_loss": 0.6388328075408936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27283045649528503, "epoch": 4.0, "learning_rate": 2.141045767419394e-05, "loss": 0.3907, "step": 4735, "task_loss": 1.1447242498397827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45001789927482605, "epoch": 4.0, "learning_rate": 2.1404419756068107e-05, "loss": 0.42, "step": 4736, "task_loss": 1.486981749534607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3620765209197998, "epoch": 4.0, "learning_rate": 2.1398381837942278e-05, "loss": 0.3522, "step": 4737, "task_loss": 0.4173358678817749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2579789161682129, "epoch": 4.01, "learning_rate": 2.1392343919816448e-05, "loss": 0.3665, "step": 4738, "task_loss": 0.34482988715171814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2959059774875641, "epoch": 4.01, "learning_rate": 2.138630600169062e-05, "loss": 0.4006, "step": 4739, "task_loss": 0.2030932903289795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23109817504882812, "epoch": 4.01, "learning_rate": 2.138026808356479e-05, "loss": 0.3337, "step": 4740, "task_loss": 0.1525581330060959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35275983810424805, "epoch": 4.01, "learning_rate": 2.1374230165438957e-05, "loss": 0.4535, "step": 4741, "task_loss": 0.13515704870224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6374374032020569, "epoch": 4.01, "learning_rate": 2.1368192247313127e-05, "loss": 0.4295, "step": 4742, "task_loss": 0.08735156059265137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2931504547595978, "epoch": 4.01, "learning_rate": 2.1362154329187298e-05, "loss": 0.3856, "step": 4743, "task_loss": 0.049383748322725296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5102002620697021, "epoch": 4.01, "learning_rate": 2.1356116411061465e-05, "loss": 0.5759, "step": 4744, "task_loss": 0.6683656573295593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22226618230342865, "epoch": 4.01, "learning_rate": 2.135007849293564e-05, "loss": 0.3565, "step": 4745, "task_loss": 0.5296968221664429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3785257637500763, "epoch": 4.01, "learning_rate": 2.1344040574809806e-05, "loss": 0.3094, "step": 4746, "task_loss": 0.12340650707483292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5323532223701477, "epoch": 4.01, "learning_rate": 2.1338002656683977e-05, "loss": 0.509, "step": 4747, "task_loss": 0.1198139637708664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4052533507347107, "epoch": 4.01, "learning_rate": 2.1331964738558147e-05, "loss": 0.5168, "step": 4748, "task_loss": 0.7686903476715088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.9071964025497437, "epoch": 4.01, "learning_rate": 2.1325926820432314e-05, "loss": 0.5263, "step": 4749, "task_loss": 0.7958686351776123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23224696516990662, "epoch": 4.02, "learning_rate": 2.1319888902306488e-05, "loss": 0.3353, "step": 4750, "task_loss": 0.35631856322288513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2811357080936432, "epoch": 4.02, "learning_rate": 2.1313850984180655e-05, "loss": 0.3661, "step": 4751, "task_loss": 0.1331200897693634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22150614857673645, "epoch": 4.02, "learning_rate": 2.1307813066054823e-05, "loss": 0.4882, "step": 4752, "task_loss": 0.6809633374214172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.224320650100708, "epoch": 4.02, "learning_rate": 2.1301775147928997e-05, "loss": 0.4384, "step": 4753, "task_loss": 0.5746221542358398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41746872663497925, "epoch": 4.02, "learning_rate": 2.1295737229803164e-05, "loss": 0.6004, "step": 4754, "task_loss": 0.10069851577281952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4340299665927887, "epoch": 4.02, "learning_rate": 2.1289699311677334e-05, "loss": 0.4569, "step": 4755, "task_loss": 0.64607834815979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24567314982414246, "epoch": 4.02, "learning_rate": 2.1283661393551505e-05, "loss": 0.5082, "step": 4756, "task_loss": 0.4432559609413147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4027273654937744, "epoch": 4.02, "learning_rate": 2.1277623475425672e-05, "loss": 0.3754, "step": 4757, "task_loss": 1.2822325229644775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3494114875793457, "epoch": 4.02, "learning_rate": 2.1271585557299846e-05, "loss": 0.3976, "step": 4758, "task_loss": 0.20238694548606873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28357356786727905, "epoch": 4.02, "learning_rate": 2.1265547639174013e-05, "loss": 0.4119, "step": 4759, "task_loss": 0.7002719640731812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24635612964630127, "epoch": 4.02, "learning_rate": 2.1259509721048184e-05, "loss": 0.3143, "step": 4760, "task_loss": 0.36798933148384094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8468934297561646, "epoch": 4.02, "learning_rate": 2.1253471802922354e-05, "loss": 0.523, "step": 4761, "task_loss": 1.2657352685928345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2950683832168579, "epoch": 4.03, "learning_rate": 2.124743388479652e-05, "loss": 0.448, "step": 4762, "task_loss": 0.8726710081100464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3506834805011749, "epoch": 4.03, "learning_rate": 2.1241395966670692e-05, "loss": 0.3175, "step": 4763, "task_loss": 0.17147406935691833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42140620946884155, "epoch": 4.03, "learning_rate": 2.1235358048544863e-05, "loss": 0.353, "step": 4764, "task_loss": 0.47559815645217896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4257904887199402, "epoch": 4.03, "learning_rate": 2.1229320130419033e-05, "loss": 0.4035, "step": 4765, "task_loss": 0.4095480442047119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2580850124359131, "epoch": 4.03, "learning_rate": 2.1223282212293204e-05, "loss": 0.3536, "step": 4766, "task_loss": 0.7929224967956543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39048779010772705, "epoch": 4.03, "learning_rate": 2.121724429416737e-05, "loss": 0.3086, "step": 4767, "task_loss": 0.10073329508304596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24884000420570374, "epoch": 4.03, "learning_rate": 2.121120637604154e-05, "loss": 0.2617, "step": 4768, "task_loss": 0.4025822579860687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2646413743495941, "epoch": 4.03, "learning_rate": 2.1205168457915712e-05, "loss": 0.3032, "step": 4769, "task_loss": 0.3086329400539398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6487721800804138, "epoch": 4.03, "learning_rate": 2.1199130539789883e-05, "loss": 0.5158, "step": 4770, "task_loss": 0.2515372037887573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28389737010002136, "epoch": 4.03, "learning_rate": 2.119309262166405e-05, "loss": 0.3503, "step": 4771, "task_loss": 0.2240525186061859 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17216986417770386, "epoch": 4.03, "learning_rate": 2.118705470353822e-05, "loss": 0.322, "step": 4772, "task_loss": 0.1966792196035385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4389966130256653, "epoch": 4.03, "learning_rate": 2.118101678541239e-05, "loss": 0.3941, "step": 4773, "task_loss": 0.9364227652549744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25216323137283325, "epoch": 4.04, "learning_rate": 2.117497886728656e-05, "loss": 0.3237, "step": 4774, "task_loss": 0.23817862570285797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30057573318481445, "epoch": 4.04, "learning_rate": 2.1168940949160732e-05, "loss": 0.3439, "step": 4775, "task_loss": 0.42688068747520447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26909083127975464, "epoch": 4.04, "learning_rate": 2.11629030310349e-05, "loss": 0.3377, "step": 4776, "task_loss": 0.41030970215797424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.196682870388031, "epoch": 4.04, "learning_rate": 2.115686511290907e-05, "loss": 0.3616, "step": 4777, "task_loss": 0.7956928610801697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3494042158126831, "epoch": 4.04, "learning_rate": 2.115082719478324e-05, "loss": 0.475, "step": 4778, "task_loss": 0.608128547668457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38667482137680054, "epoch": 4.04, "learning_rate": 2.1144789276657408e-05, "loss": 0.4012, "step": 4779, "task_loss": 0.9762008786201477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2746254503726959, "epoch": 4.04, "learning_rate": 2.113875135853158e-05, "loss": 0.4488, "step": 4780, "task_loss": 0.7842054963111877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34550124406814575, "epoch": 4.04, "learning_rate": 2.113271344040575e-05, "loss": 0.3467, "step": 4781, "task_loss": 0.5102437734603882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.337364137172699, "epoch": 4.04, "learning_rate": 2.112667552227992e-05, "loss": 0.4649, "step": 4782, "task_loss": 0.5045252442359924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39577439427375793, "epoch": 4.04, "learning_rate": 2.112063760415409e-05, "loss": 0.2875, "step": 4783, "task_loss": 0.3474442660808563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2400749772787094, "epoch": 4.04, "learning_rate": 2.1114599686028257e-05, "loss": 0.375, "step": 4784, "task_loss": 0.6512824892997742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27345606684684753, "epoch": 4.04, "learning_rate": 2.110856176790243e-05, "loss": 0.3561, "step": 4785, "task_loss": 0.24921190738677979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4654605984687805, "epoch": 4.05, "learning_rate": 2.11025238497766e-05, "loss": 0.4174, "step": 4786, "task_loss": 0.66752690076828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3968949317932129, "epoch": 4.05, "learning_rate": 2.1096485931650766e-05, "loss": 0.493, "step": 4787, "task_loss": 0.7014756798744202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5827181339263916, "epoch": 4.05, "learning_rate": 2.109044801352494e-05, "loss": 0.427, "step": 4788, "task_loss": 0.9313209056854248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3623863160610199, "epoch": 4.05, "learning_rate": 2.1084410095399107e-05, "loss": 0.3887, "step": 4789, "task_loss": 0.4508783221244812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6286660432815552, "epoch": 4.05, "learning_rate": 2.1078372177273277e-05, "loss": 0.5649, "step": 4790, "task_loss": 0.5072640180587769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1977921426296234, "epoch": 4.05, "learning_rate": 2.1072334259147448e-05, "loss": 0.2944, "step": 4791, "task_loss": 0.1584392637014389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3065502643585205, "epoch": 4.05, "learning_rate": 2.1066296341021615e-05, "loss": 0.3596, "step": 4792, "task_loss": 0.7756547927856445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3805905878543854, "epoch": 4.05, "learning_rate": 2.1060258422895786e-05, "loss": 0.402, "step": 4793, "task_loss": 0.3155575394630432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21535229682922363, "epoch": 4.05, "learning_rate": 2.1054220504769956e-05, "loss": 0.2416, "step": 4794, "task_loss": 0.06583372503519058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24823233485221863, "epoch": 4.05, "learning_rate": 2.1048182586644127e-05, "loss": 0.4178, "step": 4795, "task_loss": 0.049223192036151886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3542630672454834, "epoch": 4.05, "learning_rate": 2.1042144668518297e-05, "loss": 0.3776, "step": 4796, "task_loss": 0.5209024548530579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34688061475753784, "epoch": 4.05, "learning_rate": 2.1036106750392464e-05, "loss": 0.4106, "step": 4797, "task_loss": 0.8220951557159424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23935061693191528, "epoch": 4.06, "learning_rate": 2.1030068832266635e-05, "loss": 0.366, "step": 4798, "task_loss": 0.3952641785144806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.334037721157074, "epoch": 4.06, "learning_rate": 2.1024030914140806e-05, "loss": 0.3876, "step": 4799, "task_loss": 0.5435364842414856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35532110929489136, "epoch": 4.06, "learning_rate": 2.1017992996014976e-05, "loss": 0.3621, "step": 4800, "task_loss": 0.3693119287490845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34680601954460144, "epoch": 4.06, "learning_rate": 2.1011955077889143e-05, "loss": 0.4149, "step": 4801, "task_loss": 0.9417662620544434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2337411642074585, "epoch": 4.06, "learning_rate": 2.1005917159763314e-05, "loss": 0.387, "step": 4802, "task_loss": 0.13075530529022217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15126600861549377, "epoch": 4.06, "learning_rate": 2.0999879241637484e-05, "loss": 0.337, "step": 4803, "task_loss": 0.48792415857315063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3399249315261841, "epoch": 4.06, "learning_rate": 2.0993841323511655e-05, "loss": 0.3738, "step": 4804, "task_loss": 1.0349302291870117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1368466466665268, "epoch": 4.06, "learning_rate": 2.0987803405385826e-05, "loss": 0.3536, "step": 4805, "task_loss": 0.34006643295288086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32521671056747437, "epoch": 4.06, "learning_rate": 2.0981765487259993e-05, "loss": 0.3821, "step": 4806, "task_loss": 0.5966352224349976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5937595367431641, "epoch": 4.06, "learning_rate": 2.0975727569134163e-05, "loss": 0.484, "step": 4807, "task_loss": 0.2947319746017456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35564887523651123, "epoch": 4.06, "learning_rate": 2.0969689651008334e-05, "loss": 0.4772, "step": 4808, "task_loss": 0.8006790280342102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3680543303489685, "epoch": 4.07, "learning_rate": 2.09636517328825e-05, "loss": 0.3621, "step": 4809, "task_loss": 1.0394285917282104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34666895866394043, "epoch": 4.07, "learning_rate": 2.0957613814756675e-05, "loss": 0.3873, "step": 4810, "task_loss": 0.435205340385437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17074444890022278, "epoch": 4.07, "learning_rate": 2.0951575896630842e-05, "loss": 0.4318, "step": 4811, "task_loss": 0.4964938163757324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34322047233581543, "epoch": 4.07, "learning_rate": 2.0945537978505013e-05, "loss": 0.3703, "step": 4812, "task_loss": 1.3383314609527588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2519175708293915, "epoch": 4.07, "learning_rate": 2.0939500060379183e-05, "loss": 0.4248, "step": 4813, "task_loss": 0.8056415915489197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30481213331222534, "epoch": 4.07, "learning_rate": 2.093346214225335e-05, "loss": 0.3886, "step": 4814, "task_loss": 0.7523413300514221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4313885569572449, "epoch": 4.07, "learning_rate": 2.0927424224127525e-05, "loss": 0.4779, "step": 4815, "task_loss": 0.10625360906124115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21451416611671448, "epoch": 4.07, "learning_rate": 2.0921386306001692e-05, "loss": 0.2617, "step": 4816, "task_loss": 0.22488166391849518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3581251800060272, "epoch": 4.07, "learning_rate": 2.091534838787586e-05, "loss": 0.4808, "step": 4817, "task_loss": 0.9248092174530029 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4243791699409485, "epoch": 4.07, "learning_rate": 2.0909310469750033e-05, "loss": 0.3637, "step": 4818, "task_loss": 0.528826117515564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30868425965309143, "epoch": 4.07, "learning_rate": 2.09032725516242e-05, "loss": 0.318, "step": 4819, "task_loss": 0.23750707507133484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32443898916244507, "epoch": 4.07, "learning_rate": 2.089723463349837e-05, "loss": 0.3605, "step": 4820, "task_loss": 0.4629310667514801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4955759644508362, "epoch": 4.08, "learning_rate": 2.089119671537254e-05, "loss": 0.4603, "step": 4821, "task_loss": 0.7476202249526978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3321705758571625, "epoch": 4.08, "learning_rate": 2.088515879724671e-05, "loss": 0.4839, "step": 4822, "task_loss": 0.3478686511516571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4271596074104309, "epoch": 4.08, "learning_rate": 2.0879120879120882e-05, "loss": 0.3589, "step": 4823, "task_loss": 0.7135337591171265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4859582781791687, "epoch": 4.08, "learning_rate": 2.087308296099505e-05, "loss": 0.411, "step": 4824, "task_loss": 0.33073899149894714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3680473566055298, "epoch": 4.08, "learning_rate": 2.086704504286922e-05, "loss": 0.4209, "step": 4825, "task_loss": 0.6410704255104065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3757510483264923, "epoch": 4.08, "learning_rate": 2.086100712474339e-05, "loss": 0.3016, "step": 4826, "task_loss": 0.45014017820358276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23426391184329987, "epoch": 4.08, "learning_rate": 2.0854969206617558e-05, "loss": 0.3259, "step": 4827, "task_loss": 0.39988625049591064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45218318700790405, "epoch": 4.08, "learning_rate": 2.084893128849173e-05, "loss": 0.5358, "step": 4828, "task_loss": 0.6302747130393982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2615887224674225, "epoch": 4.08, "learning_rate": 2.08428933703659e-05, "loss": 0.3279, "step": 4829, "task_loss": 0.49998223781585693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3179033100605011, "epoch": 4.08, "learning_rate": 2.083685545224007e-05, "loss": 0.4419, "step": 4830, "task_loss": 0.7518953680992126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28417420387268066, "epoch": 4.08, "learning_rate": 2.083081753411424e-05, "loss": 0.3392, "step": 4831, "task_loss": 0.5532025694847107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2857249081134796, "epoch": 4.08, "learning_rate": 2.0824779615988407e-05, "loss": 0.3644, "step": 4832, "task_loss": 0.8376716375350952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3993995785713196, "epoch": 4.09, "learning_rate": 2.0818741697862578e-05, "loss": 0.5034, "step": 4833, "task_loss": 1.3450846672058105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4340713620185852, "epoch": 4.09, "learning_rate": 2.081270377973675e-05, "loss": 0.4114, "step": 4834, "task_loss": 0.7252369523048401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3039090037345886, "epoch": 4.09, "learning_rate": 2.080666586161092e-05, "loss": 0.3027, "step": 4835, "task_loss": 0.6101184487342834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4472924470901489, "epoch": 4.09, "learning_rate": 2.0800627943485086e-05, "loss": 0.411, "step": 4836, "task_loss": 1.2235015630722046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16689123213291168, "epoch": 4.09, "learning_rate": 2.0794590025359257e-05, "loss": 0.3865, "step": 4837, "task_loss": 0.5912393927574158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5674487352371216, "epoch": 4.09, "learning_rate": 2.0788552107233427e-05, "loss": 0.519, "step": 4838, "task_loss": 1.7052254676818848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40056851506233215, "epoch": 4.09, "learning_rate": 2.0782514189107598e-05, "loss": 0.3694, "step": 4839, "task_loss": 0.7205249071121216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5237020254135132, "epoch": 4.09, "learning_rate": 2.077647627098177e-05, "loss": 0.4186, "step": 4840, "task_loss": 0.5764660835266113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32728704810142517, "epoch": 4.09, "learning_rate": 2.0770438352855936e-05, "loss": 0.3779, "step": 4841, "task_loss": 0.6713085174560547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2944203019142151, "epoch": 4.09, "learning_rate": 2.0764400434730106e-05, "loss": 0.4001, "step": 4842, "task_loss": 0.57020503282547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.418878436088562, "epoch": 4.09, "learning_rate": 2.0758362516604277e-05, "loss": 0.3452, "step": 4843, "task_loss": 0.9546658396720886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4616398215293884, "epoch": 4.09, "learning_rate": 2.0752324598478444e-05, "loss": 0.4218, "step": 4844, "task_loss": 0.5722482800483704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4850846230983734, "epoch": 4.1, "learning_rate": 2.0746286680352618e-05, "loss": 0.3811, "step": 4845, "task_loss": 1.0110291242599487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34967297315597534, "epoch": 4.1, "learning_rate": 2.0740248762226785e-05, "loss": 0.587, "step": 4846, "task_loss": 0.664111316204071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5917301177978516, "epoch": 4.1, "learning_rate": 2.0734210844100956e-05, "loss": 0.3657, "step": 4847, "task_loss": 0.41411492228507996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32668349146842957, "epoch": 4.1, "learning_rate": 2.0728172925975126e-05, "loss": 0.3724, "step": 4848, "task_loss": 0.8458393812179565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.521426796913147, "epoch": 4.1, "learning_rate": 2.0722135007849293e-05, "loss": 0.5104, "step": 4849, "task_loss": 0.5829276442527771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42082488536834717, "epoch": 4.1, "learning_rate": 2.0716097089723464e-05, "loss": 0.4293, "step": 4850, "task_loss": 0.4657161235809326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1888958215713501, "epoch": 4.1, "learning_rate": 2.0710059171597635e-05, "loss": 0.3187, "step": 4851, "task_loss": 0.2297447770833969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3440100848674774, "epoch": 4.1, "learning_rate": 2.0704021253471802e-05, "loss": 0.3942, "step": 4852, "task_loss": 0.6344026327133179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2798870801925659, "epoch": 4.1, "learning_rate": 2.0697983335345976e-05, "loss": 0.3437, "step": 4853, "task_loss": 0.22810159623622894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3601232171058655, "epoch": 4.1, "learning_rate": 2.0691945417220143e-05, "loss": 0.3669, "step": 4854, "task_loss": 0.17488285899162292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31357139348983765, "epoch": 4.1, "learning_rate": 2.0685907499094314e-05, "loss": 0.4608, "step": 4855, "task_loss": 0.3602818548679352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3271420896053314, "epoch": 4.1, "learning_rate": 2.0679869580968484e-05, "loss": 0.3738, "step": 4856, "task_loss": 0.3499238193035126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6173704862594604, "epoch": 4.11, "learning_rate": 2.067383166284265e-05, "loss": 0.4749, "step": 4857, "task_loss": 0.6513952016830444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5608258843421936, "epoch": 4.11, "learning_rate": 2.0667793744716822e-05, "loss": 0.3775, "step": 4858, "task_loss": 1.1337559223175049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4021575152873993, "epoch": 4.11, "learning_rate": 2.0661755826590992e-05, "loss": 0.3306, "step": 4859, "task_loss": 0.3628753125667572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47452399134635925, "epoch": 4.11, "learning_rate": 2.065571790846516e-05, "loss": 0.4153, "step": 4860, "task_loss": 0.21621990203857422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41651684045791626, "epoch": 4.11, "learning_rate": 2.0649679990339334e-05, "loss": 0.3693, "step": 4861, "task_loss": 1.165889024734497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41685914993286133, "epoch": 4.11, "learning_rate": 2.06436420722135e-05, "loss": 0.4289, "step": 4862, "task_loss": 0.9819204807281494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4243483543395996, "epoch": 4.11, "learning_rate": 2.063760415408767e-05, "loss": 0.3987, "step": 4863, "task_loss": 1.3510444164276123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3103218972682953, "epoch": 4.11, "learning_rate": 2.0631566235961842e-05, "loss": 0.3778, "step": 4864, "task_loss": 0.7915773391723633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4768316149711609, "epoch": 4.11, "learning_rate": 2.062552831783601e-05, "loss": 0.4117, "step": 4865, "task_loss": 0.17231325805187225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31415629386901855, "epoch": 4.11, "learning_rate": 2.061949039971018e-05, "loss": 0.3708, "step": 4866, "task_loss": 0.6041181087493896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.517045795917511, "epoch": 4.11, "learning_rate": 2.061345248158435e-05, "loss": 0.3953, "step": 4867, "task_loss": 1.076674222946167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5355465412139893, "epoch": 4.11, "learning_rate": 2.060741456345852e-05, "loss": 0.4997, "step": 4868, "task_loss": 1.6766235828399658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.11306866258382797, "epoch": 4.12, "learning_rate": 2.060137664533269e-05, "loss": 0.3419, "step": 4869, "task_loss": 0.06433319300413132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47317779064178467, "epoch": 4.12, "learning_rate": 2.059533872720686e-05, "loss": 0.4624, "step": 4870, "task_loss": 0.8093212246894836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4197344481945038, "epoch": 4.12, "learning_rate": 2.058930080908103e-05, "loss": 0.4159, "step": 4871, "task_loss": 0.816380500793457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32472991943359375, "epoch": 4.12, "learning_rate": 2.05832628909552e-05, "loss": 0.4288, "step": 4872, "task_loss": 0.89973384141922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3972876965999603, "epoch": 4.12, "learning_rate": 2.057722497282937e-05, "loss": 0.3279, "step": 4873, "task_loss": 0.4747660160064697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26815587282180786, "epoch": 4.12, "learning_rate": 2.0571187054703537e-05, "loss": 0.3206, "step": 4874, "task_loss": 0.6931321024894714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31218889355659485, "epoch": 4.12, "learning_rate": 2.0565149136577708e-05, "loss": 0.425, "step": 4875, "task_loss": 0.254214882850647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4120326638221741, "epoch": 4.12, "learning_rate": 2.055911121845188e-05, "loss": 0.3673, "step": 4876, "task_loss": 0.3249792158603668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41802603006362915, "epoch": 4.12, "learning_rate": 2.055307330032605e-05, "loss": 0.4066, "step": 4877, "task_loss": 0.9161026477813721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3289290964603424, "epoch": 4.12, "learning_rate": 2.054703538220022e-05, "loss": 0.3748, "step": 4878, "task_loss": 0.4613529443740845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2438494712114334, "epoch": 4.12, "learning_rate": 2.0540997464074387e-05, "loss": 0.4593, "step": 4879, "task_loss": 0.26984691619873047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3655092716217041, "epoch": 4.13, "learning_rate": 2.0534959545948557e-05, "loss": 0.4344, "step": 4880, "task_loss": 0.22452418506145477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3004671335220337, "epoch": 4.13, "learning_rate": 2.0528921627822728e-05, "loss": 0.288, "step": 4881, "task_loss": 0.10655982792377472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45654165744781494, "epoch": 4.13, "learning_rate": 2.0522883709696895e-05, "loss": 0.5326, "step": 4882, "task_loss": 1.1333647966384888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3612760603427887, "epoch": 4.13, "learning_rate": 2.051684579157107e-05, "loss": 0.4161, "step": 4883, "task_loss": 1.208998441696167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3970102369785309, "epoch": 4.13, "learning_rate": 2.0510807873445236e-05, "loss": 0.472, "step": 4884, "task_loss": 0.6679598689079285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40225136280059814, "epoch": 4.13, "learning_rate": 2.0504769955319407e-05, "loss": 0.3424, "step": 4885, "task_loss": 0.09269295632839203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24116365611553192, "epoch": 4.13, "learning_rate": 2.0498732037193578e-05, "loss": 0.4268, "step": 4886, "task_loss": 0.1385694444179535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29240623116493225, "epoch": 4.13, "learning_rate": 2.0492694119067745e-05, "loss": 0.2519, "step": 4887, "task_loss": 0.15054063498973846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4206714630126953, "epoch": 4.13, "learning_rate": 2.048665620094192e-05, "loss": 0.4277, "step": 4888, "task_loss": 0.785014271736145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30322080850601196, "epoch": 4.13, "learning_rate": 2.0480618282816086e-05, "loss": 0.3963, "step": 4889, "task_loss": 0.09553965926170349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4152767062187195, "epoch": 4.13, "learning_rate": 2.0474580364690253e-05, "loss": 0.441, "step": 4890, "task_loss": 1.0953460931777954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30176910758018494, "epoch": 4.13, "learning_rate": 2.0468542446564427e-05, "loss": 0.4106, "step": 4891, "task_loss": 0.6238240599632263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3066827952861786, "epoch": 4.14, "learning_rate": 2.0462504528438594e-05, "loss": 0.3864, "step": 4892, "task_loss": 0.6079174876213074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5566063523292542, "epoch": 4.14, "learning_rate": 2.0456466610312765e-05, "loss": 0.522, "step": 4893, "task_loss": 1.0442373752593994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.572799563407898, "epoch": 4.14, "learning_rate": 2.0450428692186935e-05, "loss": 0.4516, "step": 4894, "task_loss": 0.5820687413215637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27627015113830566, "epoch": 4.14, "learning_rate": 2.0444390774061102e-05, "loss": 0.3933, "step": 4895, "task_loss": 0.5689317584037781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4576801061630249, "epoch": 4.14, "learning_rate": 2.0438352855935276e-05, "loss": 0.3216, "step": 4896, "task_loss": 0.9261101484298706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4437587261199951, "epoch": 4.14, "learning_rate": 2.0432314937809444e-05, "loss": 0.4837, "step": 4897, "task_loss": 1.3549948930740356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.562465250492096, "epoch": 4.14, "learning_rate": 2.0426277019683614e-05, "loss": 0.5256, "step": 4898, "task_loss": 0.9140065312385559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3004215955734253, "epoch": 4.14, "learning_rate": 2.0420239101557785e-05, "loss": 0.3223, "step": 4899, "task_loss": 0.1704394668340683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37946778535842896, "epoch": 4.14, "learning_rate": 2.0414201183431952e-05, "loss": 0.4495, "step": 4900, "task_loss": 0.5033672451972961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.567090630531311, "epoch": 4.14, "learning_rate": 2.0408163265306123e-05, "loss": 0.4371, "step": 4901, "task_loss": 0.8256871700286865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20959050953388214, "epoch": 4.14, "learning_rate": 2.0402125347180293e-05, "loss": 0.3813, "step": 4902, "task_loss": 0.7078710198402405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20147162675857544, "epoch": 4.14, "learning_rate": 2.0396087429054464e-05, "loss": 0.3633, "step": 4903, "task_loss": 0.36383500695228577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1696646809577942, "epoch": 4.15, "learning_rate": 2.0390049510928634e-05, "loss": 0.2605, "step": 4904, "task_loss": 0.026303105056285858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21794937551021576, "epoch": 4.15, "learning_rate": 2.03840115928028e-05, "loss": 0.2796, "step": 4905, "task_loss": 0.22251349687576294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3813096582889557, "epoch": 4.15, "learning_rate": 2.0377973674676972e-05, "loss": 0.494, "step": 4906, "task_loss": 0.45608580112457275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35572385787963867, "epoch": 4.15, "learning_rate": 2.0371935756551143e-05, "loss": 0.4173, "step": 4907, "task_loss": 0.5525251030921936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3964511752128601, "epoch": 4.15, "learning_rate": 2.0365897838425313e-05, "loss": 0.3725, "step": 4908, "task_loss": 0.14927008748054504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28523704409599304, "epoch": 4.15, "learning_rate": 2.035985992029948e-05, "loss": 0.445, "step": 4909, "task_loss": 0.40992385149002075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27880197763442993, "epoch": 4.15, "learning_rate": 2.035382200217365e-05, "loss": 0.3504, "step": 4910, "task_loss": 0.47442206740379333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37250158190727234, "epoch": 4.15, "learning_rate": 2.034778408404782e-05, "loss": 0.3537, "step": 4911, "task_loss": 0.9203653931617737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2691155970096588, "epoch": 4.15, "learning_rate": 2.0341746165921992e-05, "loss": 0.3444, "step": 4912, "task_loss": 0.6142327785491943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3256071209907532, "epoch": 4.15, "learning_rate": 2.0335708247796163e-05, "loss": 0.4453, "step": 4913, "task_loss": 0.3778332471847534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2930718660354614, "epoch": 4.15, "learning_rate": 2.032967032967033e-05, "loss": 0.411, "step": 4914, "task_loss": 0.5430536270141602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2754436731338501, "epoch": 4.15, "learning_rate": 2.03236324115445e-05, "loss": 0.2937, "step": 4915, "task_loss": 0.08764053136110306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2939983904361725, "epoch": 4.16, "learning_rate": 2.031759449341867e-05, "loss": 0.4087, "step": 4916, "task_loss": 0.4664520025253296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30161425471305847, "epoch": 4.16, "learning_rate": 2.0311556575292838e-05, "loss": 0.3531, "step": 4917, "task_loss": 0.8165481090545654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42050307989120483, "epoch": 4.16, "learning_rate": 2.0305518657167012e-05, "loss": 0.4429, "step": 4918, "task_loss": 0.8496973514556885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4591634273529053, "epoch": 4.16, "learning_rate": 2.029948073904118e-05, "loss": 0.4133, "step": 4919, "task_loss": 0.4675963222980499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2775533199310303, "epoch": 4.16, "learning_rate": 2.029344282091535e-05, "loss": 0.3616, "step": 4920, "task_loss": 0.4133470058441162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43146079778671265, "epoch": 4.16, "learning_rate": 2.028740490278952e-05, "loss": 0.3982, "step": 4921, "task_loss": 0.49137082695961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2605120539665222, "epoch": 4.16, "learning_rate": 2.0281366984663688e-05, "loss": 0.4637, "step": 4922, "task_loss": 0.6774657368659973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3654075264930725, "epoch": 4.16, "learning_rate": 2.0275329066537858e-05, "loss": 0.4124, "step": 4923, "task_loss": 0.15728804469108582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3170989155769348, "epoch": 4.16, "learning_rate": 2.026929114841203e-05, "loss": 0.3383, "step": 4924, "task_loss": 0.58356773853302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3012944757938385, "epoch": 4.16, "learning_rate": 2.0263253230286196e-05, "loss": 0.3238, "step": 4925, "task_loss": 0.21051791310310364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23221299052238464, "epoch": 4.16, "learning_rate": 2.025721531216037e-05, "loss": 0.3061, "step": 4926, "task_loss": 0.47761842608451843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19469064474105835, "epoch": 4.16, "learning_rate": 2.0251177394034537e-05, "loss": 0.3966, "step": 4927, "task_loss": 0.39869463443756104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30066609382629395, "epoch": 4.17, "learning_rate": 2.0245139475908708e-05, "loss": 0.3007, "step": 4928, "task_loss": 0.1384783387184143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3878682851791382, "epoch": 4.17, "learning_rate": 2.0239101557782878e-05, "loss": 0.3598, "step": 4929, "task_loss": 0.6870420575141907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32719042897224426, "epoch": 4.17, "learning_rate": 2.0233063639657045e-05, "loss": 0.3379, "step": 4930, "task_loss": 0.18080797791481018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34560877084732056, "epoch": 4.17, "learning_rate": 2.0227025721531216e-05, "loss": 0.4396, "step": 4931, "task_loss": 1.1543644666671753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4853559136390686, "epoch": 4.17, "learning_rate": 2.0220987803405387e-05, "loss": 0.4588, "step": 4932, "task_loss": 0.8782967329025269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2241063117980957, "epoch": 4.17, "learning_rate": 2.0214949885279557e-05, "loss": 0.3553, "step": 4933, "task_loss": 0.8076143860816956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16157348453998566, "epoch": 4.17, "learning_rate": 2.0208911967153728e-05, "loss": 0.4801, "step": 4934, "task_loss": 0.042991310358047485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3595232367515564, "epoch": 4.17, "learning_rate": 2.0202874049027895e-05, "loss": 0.3698, "step": 4935, "task_loss": 0.7689218521118164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14912250638008118, "epoch": 4.17, "learning_rate": 2.0196836130902065e-05, "loss": 0.2739, "step": 4936, "task_loss": 0.04086870700120926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27355891466140747, "epoch": 4.17, "learning_rate": 2.0190798212776236e-05, "loss": 0.2745, "step": 4937, "task_loss": 0.8040363788604736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3140448033809662, "epoch": 4.17, "learning_rate": 2.0184760294650407e-05, "loss": 0.4341, "step": 4938, "task_loss": 0.7871727347373962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.316908597946167, "epoch": 4.17, "learning_rate": 2.0178722376524574e-05, "loss": 0.518, "step": 4939, "task_loss": 0.7179533839225769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4450612962245941, "epoch": 4.18, "learning_rate": 2.0172684458398744e-05, "loss": 0.4565, "step": 4940, "task_loss": 1.3333171606063843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2684367299079895, "epoch": 4.18, "learning_rate": 2.0166646540272915e-05, "loss": 0.2865, "step": 4941, "task_loss": 0.4282436966896057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2725856900215149, "epoch": 4.18, "learning_rate": 2.0160608622147085e-05, "loss": 0.2974, "step": 4942, "task_loss": 0.6378273963928223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3047941327095032, "epoch": 4.18, "learning_rate": 2.0154570704021256e-05, "loss": 0.3715, "step": 4943, "task_loss": 0.3798714876174927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3053954839706421, "epoch": 4.18, "learning_rate": 2.0148532785895423e-05, "loss": 0.3442, "step": 4944, "task_loss": 0.45613953471183777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.333272784948349, "epoch": 4.18, "learning_rate": 2.0142494867769594e-05, "loss": 0.2873, "step": 4945, "task_loss": 0.8703317046165466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22788889706134796, "epoch": 4.18, "learning_rate": 2.0136456949643764e-05, "loss": 0.4442, "step": 4946, "task_loss": 0.09991218894720078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38102981448173523, "epoch": 4.18, "learning_rate": 2.013041903151793e-05, "loss": 0.406, "step": 4947, "task_loss": 0.5017703175544739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4326510429382324, "epoch": 4.18, "learning_rate": 2.0124381113392105e-05, "loss": 0.382, "step": 4948, "task_loss": 0.4931629002094269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31172603368759155, "epoch": 4.18, "learning_rate": 2.0118343195266273e-05, "loss": 0.37, "step": 4949, "task_loss": 1.6177444458007812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5692975521087646, "epoch": 4.18, "learning_rate": 2.0112305277140443e-05, "loss": 0.376, "step": 4950, "task_loss": 0.6756212711334229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22614987194538116, "epoch": 4.19, "learning_rate": 2.0106267359014614e-05, "loss": 0.3959, "step": 4951, "task_loss": 0.09018303453922272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3275710940361023, "epoch": 4.19, "learning_rate": 2.010022944088878e-05, "loss": 0.3235, "step": 4952, "task_loss": 0.3188950717449188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4158264100551605, "epoch": 4.19, "learning_rate": 2.0094191522762955e-05, "loss": 0.4541, "step": 4953, "task_loss": 0.9696528911590576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3220638632774353, "epoch": 4.19, "learning_rate": 2.0088153604637122e-05, "loss": 0.3566, "step": 4954, "task_loss": 0.6239708662033081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3361540138721466, "epoch": 4.19, "learning_rate": 2.008211568651129e-05, "loss": 0.3323, "step": 4955, "task_loss": 0.2922322154045105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6681088209152222, "epoch": 4.19, "learning_rate": 2.0076077768385463e-05, "loss": 0.4533, "step": 4956, "task_loss": 0.6300988793373108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5283669233322144, "epoch": 4.19, "learning_rate": 2.007003985025963e-05, "loss": 0.3609, "step": 4957, "task_loss": 0.39297911524772644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.272205114364624, "epoch": 4.19, "learning_rate": 2.00640019321338e-05, "loss": 0.4084, "step": 4958, "task_loss": 0.32826662063598633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24138741195201874, "epoch": 4.19, "learning_rate": 2.005796401400797e-05, "loss": 0.3698, "step": 4959, "task_loss": 0.2210940718650818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3609721064567566, "epoch": 4.19, "learning_rate": 2.005192609588214e-05, "loss": 0.3244, "step": 4960, "task_loss": 0.32420748472213745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25469160079956055, "epoch": 4.19, "learning_rate": 2.0045888177756313e-05, "loss": 0.4062, "step": 4961, "task_loss": 1.6894043684005737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3518746793270111, "epoch": 4.19, "learning_rate": 2.003985025963048e-05, "loss": 0.4308, "step": 4962, "task_loss": 0.7561299800872803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27272337675094604, "epoch": 4.2, "learning_rate": 2.003381234150465e-05, "loss": 0.3964, "step": 4963, "task_loss": 0.4220966100692749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24055899679660797, "epoch": 4.2, "learning_rate": 2.002777442337882e-05, "loss": 0.422, "step": 4964, "task_loss": 0.5692592263221741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2984093725681305, "epoch": 4.2, "learning_rate": 2.0021736505252988e-05, "loss": 0.3685, "step": 4965, "task_loss": 0.2515117824077606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3494958281517029, "epoch": 4.2, "learning_rate": 2.001569858712716e-05, "loss": 0.3763, "step": 4966, "task_loss": 0.8490211963653564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3790994882583618, "epoch": 4.2, "learning_rate": 2.000966066900133e-05, "loss": 0.4413, "step": 4967, "task_loss": 0.85321444272995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24561502039432526, "epoch": 4.2, "learning_rate": 2.00036227508755e-05, "loss": 0.4198, "step": 4968, "task_loss": 0.4973517060279846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35247713327407837, "epoch": 4.2, "learning_rate": 1.999758483274967e-05, "loss": 0.4418, "step": 4969, "task_loss": 0.6519553661346436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6182500123977661, "epoch": 4.2, "learning_rate": 1.9991546914623838e-05, "loss": 0.5745, "step": 4970, "task_loss": 1.4613137245178223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25978586077690125, "epoch": 4.2, "learning_rate": 1.9985508996498008e-05, "loss": 0.3147, "step": 4971, "task_loss": 0.38996621966362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40596163272857666, "epoch": 4.2, "learning_rate": 1.997947107837218e-05, "loss": 0.4876, "step": 4972, "task_loss": 0.8661603927612305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5627560615539551, "epoch": 4.2, "learning_rate": 1.997343316024635e-05, "loss": 0.475, "step": 4973, "task_loss": 0.7330241203308105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45729589462280273, "epoch": 4.2, "learning_rate": 1.9967395242120517e-05, "loss": 0.3845, "step": 4974, "task_loss": 1.104781985282898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19327864050865173, "epoch": 4.21, "learning_rate": 1.9961357323994687e-05, "loss": 0.3426, "step": 4975, "task_loss": 0.5238791704177856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3826206624507904, "epoch": 4.21, "learning_rate": 1.9955319405868858e-05, "loss": 0.4117, "step": 4976, "task_loss": 0.47415733337402344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4795910120010376, "epoch": 4.21, "learning_rate": 1.994928148774303e-05, "loss": 0.4045, "step": 4977, "task_loss": 0.9912485480308533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.361281156539917, "epoch": 4.21, "learning_rate": 1.99432435696172e-05, "loss": 0.4207, "step": 4978, "task_loss": 0.11904170364141464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3799096941947937, "epoch": 4.21, "learning_rate": 1.9937205651491366e-05, "loss": 0.3514, "step": 4979, "task_loss": 0.6522350311279297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23251400887966156, "epoch": 4.21, "learning_rate": 1.9931167733365537e-05, "loss": 0.3851, "step": 4980, "task_loss": 0.4167139530181885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2434770166873932, "epoch": 4.21, "learning_rate": 1.9925129815239707e-05, "loss": 0.5161, "step": 4981, "task_loss": 0.5981890559196472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29288631677627563, "epoch": 4.21, "learning_rate": 1.9919091897113874e-05, "loss": 0.2516, "step": 4982, "task_loss": 0.7830543518066406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.322804719209671, "epoch": 4.21, "learning_rate": 1.991305397898805e-05, "loss": 0.4269, "step": 4983, "task_loss": 0.8535153865814209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5986583232879639, "epoch": 4.21, "learning_rate": 1.9907016060862216e-05, "loss": 0.588, "step": 4984, "task_loss": 1.39434015750885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38143980503082275, "epoch": 4.21, "learning_rate": 1.9900978142736386e-05, "loss": 0.3251, "step": 4985, "task_loss": 0.6008881330490112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3179433345794678, "epoch": 4.21, "learning_rate": 1.9894940224610557e-05, "loss": 0.4746, "step": 4986, "task_loss": 0.6979768872261047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22325649857521057, "epoch": 4.22, "learning_rate": 1.9888902306484724e-05, "loss": 0.3243, "step": 4987, "task_loss": 1.1241180896759033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5107250213623047, "epoch": 4.22, "learning_rate": 1.9882864388358894e-05, "loss": 0.3998, "step": 4988, "task_loss": 1.2687104940414429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26019155979156494, "epoch": 4.22, "learning_rate": 1.9876826470233065e-05, "loss": 0.383, "step": 4989, "task_loss": 0.6220192313194275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5887449979782104, "epoch": 4.22, "learning_rate": 1.9870788552107232e-05, "loss": 0.4077, "step": 4990, "task_loss": 1.4993582963943481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21535304188728333, "epoch": 4.22, "learning_rate": 1.9864750633981406e-05, "loss": 0.3068, "step": 4991, "task_loss": 0.09163609147071838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33346810936927795, "epoch": 4.22, "learning_rate": 1.9858712715855573e-05, "loss": 0.43, "step": 4992, "task_loss": 0.595499575138092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21814799308776855, "epoch": 4.22, "learning_rate": 1.9852674797729744e-05, "loss": 0.367, "step": 4993, "task_loss": 0.3706550598144531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2747762203216553, "epoch": 4.22, "learning_rate": 1.9846636879603914e-05, "loss": 0.3414, "step": 4994, "task_loss": 1.3551198244094849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30782783031463623, "epoch": 4.22, "learning_rate": 1.984059896147808e-05, "loss": 0.339, "step": 4995, "task_loss": 0.8920853734016418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25947052240371704, "epoch": 4.22, "learning_rate": 1.9834561043352252e-05, "loss": 0.3716, "step": 4996, "task_loss": 0.5010493397712708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47972801327705383, "epoch": 4.22, "learning_rate": 1.9828523125226423e-05, "loss": 0.5371, "step": 4997, "task_loss": 1.0532047748565674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29621759057044983, "epoch": 4.22, "learning_rate": 1.9822485207100593e-05, "loss": 0.4422, "step": 4998, "task_loss": 0.09287156164646149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3429713249206543, "epoch": 4.23, "learning_rate": 1.9816447288974764e-05, "loss": 0.4056, "step": 4999, "task_loss": 0.7150721549987793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.440720796585083, "epoch": 4.23, "learning_rate": 1.981040937084893e-05, "loss": 0.3717, "step": 5000, "task_loss": 0.6581095457077026 }, { "epoch": 4.23, "eval_accuracy": 0.9147326732673268, "eval_loss": 0.245942622423172, "eval_runtime": 316.1512, "eval_samples_per_second": 79.867, "eval_steps_per_second": 0.626, "step": 5000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.340566486120224, "epoch": 4.23, "learning_rate": 1.9804371452723102e-05, "loss": 0.3321, "step": 5001, "task_loss": 0.5915564298629761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2717490792274475, "epoch": 4.23, "learning_rate": 1.9798333534597272e-05, "loss": 0.4126, "step": 5002, "task_loss": 0.31342580914497375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.247637540102005, "epoch": 4.23, "learning_rate": 1.9792295616471443e-05, "loss": 0.3771, "step": 5003, "task_loss": 0.39172741770744324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4416036009788513, "epoch": 4.23, "learning_rate": 1.978625769834561e-05, "loss": 0.4501, "step": 5004, "task_loss": 0.5157111883163452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2934147119522095, "epoch": 4.23, "learning_rate": 1.978021978021978e-05, "loss": 0.2895, "step": 5005, "task_loss": 0.4045398533344269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30605167150497437, "epoch": 4.23, "learning_rate": 1.977418186209395e-05, "loss": 0.5037, "step": 5006, "task_loss": 0.21912696957588196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3859097957611084, "epoch": 4.23, "learning_rate": 1.9768143943968122e-05, "loss": 0.3296, "step": 5007, "task_loss": 0.6950794458389282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33782273530960083, "epoch": 4.23, "learning_rate": 1.9762106025842292e-05, "loss": 0.472, "step": 5008, "task_loss": 0.06545872241258621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2946425974369049, "epoch": 4.23, "learning_rate": 1.975606810771646e-05, "loss": 0.35, "step": 5009, "task_loss": 0.18558675050735474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7137224674224854, "epoch": 4.23, "learning_rate": 1.975003018959063e-05, "loss": 0.4595, "step": 5010, "task_loss": 0.962865948677063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43251627683639526, "epoch": 4.24, "learning_rate": 1.97439922714648e-05, "loss": 0.3397, "step": 5011, "task_loss": 0.44594594836235046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2200215607881546, "epoch": 4.24, "learning_rate": 1.9737954353338968e-05, "loss": 0.31, "step": 5012, "task_loss": 0.7106783986091614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21724560856819153, "epoch": 4.24, "learning_rate": 1.9731916435213142e-05, "loss": 0.2933, "step": 5013, "task_loss": 0.6119659543037415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2910676598548889, "epoch": 4.24, "learning_rate": 1.972587851708731e-05, "loss": 0.3921, "step": 5014, "task_loss": 0.26283782720565796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4141484200954437, "epoch": 4.24, "learning_rate": 1.971984059896148e-05, "loss": 0.3989, "step": 5015, "task_loss": 0.4120986759662628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2969052791595459, "epoch": 4.24, "learning_rate": 1.971380268083565e-05, "loss": 0.3702, "step": 5016, "task_loss": 0.4616381824016571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2236185371875763, "epoch": 4.24, "learning_rate": 1.9707764762709817e-05, "loss": 0.4127, "step": 5017, "task_loss": 0.954069197177887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.419619619846344, "epoch": 4.24, "learning_rate": 1.970172684458399e-05, "loss": 0.3906, "step": 5018, "task_loss": 0.28915297985076904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4436971843242645, "epoch": 4.24, "learning_rate": 1.969568892645816e-05, "loss": 0.4338, "step": 5019, "task_loss": 1.0837959051132202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37543410062789917, "epoch": 4.24, "learning_rate": 1.9689651008332326e-05, "loss": 0.2883, "step": 5020, "task_loss": 0.35486888885498047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28143510222435, "epoch": 4.24, "learning_rate": 1.96836130902065e-05, "loss": 0.32, "step": 5021, "task_loss": 0.32861846685409546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3473047614097595, "epoch": 4.24, "learning_rate": 1.9677575172080667e-05, "loss": 0.3396, "step": 5022, "task_loss": 1.2079094648361206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6049901247024536, "epoch": 4.25, "learning_rate": 1.9671537253954837e-05, "loss": 0.3557, "step": 5023, "task_loss": 0.565191924571991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21333351731300354, "epoch": 4.25, "learning_rate": 1.9665499335829008e-05, "loss": 0.3712, "step": 5024, "task_loss": 0.30474066734313965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5061161518096924, "epoch": 4.25, "learning_rate": 1.9659461417703175e-05, "loss": 0.4253, "step": 5025, "task_loss": 0.5530281662940979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3119223713874817, "epoch": 4.25, "learning_rate": 1.965342349957735e-05, "loss": 0.3981, "step": 5026, "task_loss": 0.31619369983673096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.390781968832016, "epoch": 4.25, "learning_rate": 1.9647385581451516e-05, "loss": 0.4894, "step": 5027, "task_loss": 0.1291448473930359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5418936014175415, "epoch": 4.25, "learning_rate": 1.9641347663325683e-05, "loss": 0.3936, "step": 5028, "task_loss": 0.7358436584472656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39260581135749817, "epoch": 4.25, "learning_rate": 1.9635309745199857e-05, "loss": 0.3494, "step": 5029, "task_loss": 1.178260087966919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.301521360874176, "epoch": 4.25, "learning_rate": 1.9629271827074025e-05, "loss": 0.3288, "step": 5030, "task_loss": 0.5117861032485962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4330814778804779, "epoch": 4.25, "learning_rate": 1.9623233908948195e-05, "loss": 0.4126, "step": 5031, "task_loss": 0.9685688614845276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4248940348625183, "epoch": 4.25, "learning_rate": 1.9617195990822366e-05, "loss": 0.4261, "step": 5032, "task_loss": 0.18653899431228638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4033006727695465, "epoch": 4.25, "learning_rate": 1.9611158072696533e-05, "loss": 0.3257, "step": 5033, "task_loss": 0.7322436571121216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43314462900161743, "epoch": 4.26, "learning_rate": 1.9605120154570707e-05, "loss": 0.4114, "step": 5034, "task_loss": 0.6786592602729797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1949365735054016, "epoch": 4.26, "learning_rate": 1.9599082236444874e-05, "loss": 0.3234, "step": 5035, "task_loss": 0.3937545716762543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3681657016277313, "epoch": 4.26, "learning_rate": 1.9593044318319045e-05, "loss": 0.3848, "step": 5036, "task_loss": 0.7844576835632324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8633118271827698, "epoch": 4.26, "learning_rate": 1.9587006400193215e-05, "loss": 0.6575, "step": 5037, "task_loss": 0.2978464961051941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3306819200515747, "epoch": 4.26, "learning_rate": 1.9580968482067382e-05, "loss": 0.394, "step": 5038, "task_loss": 0.40381795167922974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3909815847873688, "epoch": 4.26, "learning_rate": 1.9574930563941553e-05, "loss": 0.4123, "step": 5039, "task_loss": 0.5056546926498413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3018808364868164, "epoch": 4.26, "learning_rate": 1.9568892645815723e-05, "loss": 0.3384, "step": 5040, "task_loss": 0.37927794456481934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49077558517456055, "epoch": 4.26, "learning_rate": 1.9562854727689894e-05, "loss": 0.2827, "step": 5041, "task_loss": 0.36419281363487244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3683588206768036, "epoch": 4.26, "learning_rate": 1.9556816809564065e-05, "loss": 0.3443, "step": 5042, "task_loss": 0.4446830451488495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16218256950378418, "epoch": 4.26, "learning_rate": 1.9550778891438232e-05, "loss": 0.2785, "step": 5043, "task_loss": 0.455960214138031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5839213132858276, "epoch": 4.26, "learning_rate": 1.9544740973312402e-05, "loss": 0.4655, "step": 5044, "task_loss": 0.21621882915496826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4224521517753601, "epoch": 4.26, "learning_rate": 1.9538703055186573e-05, "loss": 0.3453, "step": 5045, "task_loss": 0.3061593770980835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47634050250053406, "epoch": 4.27, "learning_rate": 1.9532665137060744e-05, "loss": 0.3488, "step": 5046, "task_loss": 0.5383303761482239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24038687348365784, "epoch": 4.27, "learning_rate": 1.952662721893491e-05, "loss": 0.4426, "step": 5047, "task_loss": 0.2843947410583496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3637498915195465, "epoch": 4.27, "learning_rate": 1.952058930080908e-05, "loss": 0.4535, "step": 5048, "task_loss": 0.4632522165775299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47995683550834656, "epoch": 4.27, "learning_rate": 1.9514551382683252e-05, "loss": 0.3994, "step": 5049, "task_loss": 0.5567610263824463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27976924180984497, "epoch": 4.27, "learning_rate": 1.9508513464557422e-05, "loss": 0.2595, "step": 5050, "task_loss": 0.5924615263938904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2878987491130829, "epoch": 4.27, "learning_rate": 1.9502475546431593e-05, "loss": 0.5001, "step": 5051, "task_loss": 0.4725993275642395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36713501811027527, "epoch": 4.27, "learning_rate": 1.949643762830576e-05, "loss": 0.4048, "step": 5052, "task_loss": 0.47621262073516846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40720686316490173, "epoch": 4.27, "learning_rate": 1.949039971017993e-05, "loss": 0.369, "step": 5053, "task_loss": 0.7771027088165283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4262758195400238, "epoch": 4.27, "learning_rate": 1.94843617920541e-05, "loss": 0.3743, "step": 5054, "task_loss": 1.2666001319885254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42212334275245667, "epoch": 4.27, "learning_rate": 1.947832387392827e-05, "loss": 0.4399, "step": 5055, "task_loss": 0.39557012915611267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6823728084564209, "epoch": 4.27, "learning_rate": 1.9472285955802442e-05, "loss": 0.4466, "step": 5056, "task_loss": 1.407310962677002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 1.0583356618881226, "epoch": 4.27, "learning_rate": 1.946624803767661e-05, "loss": 0.5467, "step": 5057, "task_loss": 1.5133870840072632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19262272119522095, "epoch": 4.28, "learning_rate": 1.946021011955078e-05, "loss": 0.3882, "step": 5058, "task_loss": 0.15524205565452576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40412086248397827, "epoch": 4.28, "learning_rate": 1.945417220142495e-05, "loss": 0.5847, "step": 5059, "task_loss": 0.42263004183769226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5090640783309937, "epoch": 4.28, "learning_rate": 1.9448134283299118e-05, "loss": 0.4157, "step": 5060, "task_loss": 0.5036785006523132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3324001431465149, "epoch": 4.28, "learning_rate": 1.944209636517329e-05, "loss": 0.4342, "step": 5061, "task_loss": 0.4077487289905548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1906145066022873, "epoch": 4.28, "learning_rate": 1.943605844704746e-05, "loss": 0.3346, "step": 5062, "task_loss": 0.895012617111206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7065595388412476, "epoch": 4.28, "learning_rate": 1.9430020528921626e-05, "loss": 0.6542, "step": 5063, "task_loss": 0.49127668142318726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5543975830078125, "epoch": 4.28, "learning_rate": 1.94239826107958e-05, "loss": 0.5596, "step": 5064, "task_loss": 0.3633556663990021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36903470754623413, "epoch": 4.28, "learning_rate": 1.9417944692669967e-05, "loss": 0.3482, "step": 5065, "task_loss": 0.3825729191303253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4103553891181946, "epoch": 4.28, "learning_rate": 1.9411906774544138e-05, "loss": 0.4988, "step": 5066, "task_loss": 0.5289757251739502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4191024601459503, "epoch": 4.28, "learning_rate": 1.940586885641831e-05, "loss": 0.3957, "step": 5067, "task_loss": 0.13650432229042053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.394170343875885, "epoch": 4.28, "learning_rate": 1.9399830938292476e-05, "loss": 0.3929, "step": 5068, "task_loss": 0.7876015901565552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22902610898017883, "epoch": 4.28, "learning_rate": 1.9393793020166646e-05, "loss": 0.3302, "step": 5069, "task_loss": 0.6722349524497986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42762449383735657, "epoch": 4.29, "learning_rate": 1.9387755102040817e-05, "loss": 0.4204, "step": 5070, "task_loss": 1.4104900360107422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22987636923789978, "epoch": 4.29, "learning_rate": 1.9381717183914987e-05, "loss": 0.3454, "step": 5071, "task_loss": 0.026751097291707993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5178745985031128, "epoch": 4.29, "learning_rate": 1.9375679265789158e-05, "loss": 0.4159, "step": 5072, "task_loss": 0.6209869384765625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40792879462242126, "epoch": 4.29, "learning_rate": 1.9369641347663325e-05, "loss": 0.6328, "step": 5073, "task_loss": 0.7229844927787781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.485085129737854, "epoch": 4.29, "learning_rate": 1.9363603429537496e-05, "loss": 0.3944, "step": 5074, "task_loss": 0.4511586129665375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.396629273891449, "epoch": 4.29, "learning_rate": 1.9357565511411666e-05, "loss": 0.3983, "step": 5075, "task_loss": 0.7665345072746277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25677409768104553, "epoch": 4.29, "learning_rate": 1.9351527593285837e-05, "loss": 0.3041, "step": 5076, "task_loss": 0.06041169911623001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49682533740997314, "epoch": 4.29, "learning_rate": 1.9345489675160004e-05, "loss": 0.5031, "step": 5077, "task_loss": 0.5118041634559631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24874132871627808, "epoch": 4.29, "learning_rate": 1.9339451757034175e-05, "loss": 0.2958, "step": 5078, "task_loss": 0.5976605415344238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6261084079742432, "epoch": 4.29, "learning_rate": 1.9333413838908345e-05, "loss": 0.4582, "step": 5079, "task_loss": 0.8701868653297424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2672179341316223, "epoch": 4.29, "learning_rate": 1.9327375920782516e-05, "loss": 0.2574, "step": 5080, "task_loss": 0.4312443137168884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4501747488975525, "epoch": 4.29, "learning_rate": 1.9321338002656686e-05, "loss": 0.3716, "step": 5081, "task_loss": 0.7131360769271851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15496492385864258, "epoch": 4.3, "learning_rate": 1.9315300084530854e-05, "loss": 0.2756, "step": 5082, "task_loss": 0.28123173117637634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23929083347320557, "epoch": 4.3, "learning_rate": 1.9309262166405024e-05, "loss": 0.4072, "step": 5083, "task_loss": 0.5074484944343567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21296487748622894, "epoch": 4.3, "learning_rate": 1.9303224248279195e-05, "loss": 0.3014, "step": 5084, "task_loss": 0.5815112590789795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.12501360476016998, "epoch": 4.3, "learning_rate": 1.9297186330153362e-05, "loss": 0.3036, "step": 5085, "task_loss": 0.008761283941566944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6004858613014221, "epoch": 4.3, "learning_rate": 1.9291148412027536e-05, "loss": 0.3705, "step": 5086, "task_loss": 0.5043809413909912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4914315938949585, "epoch": 4.3, "learning_rate": 1.9285110493901703e-05, "loss": 0.3853, "step": 5087, "task_loss": 0.7369866967201233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19145721197128296, "epoch": 4.3, "learning_rate": 1.9279072575775874e-05, "loss": 0.3217, "step": 5088, "task_loss": 0.6003506779670715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30889278650283813, "epoch": 4.3, "learning_rate": 1.9273034657650044e-05, "loss": 0.4155, "step": 5089, "task_loss": 0.6112839579582214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2443179339170456, "epoch": 4.3, "learning_rate": 1.926699673952421e-05, "loss": 0.3106, "step": 5090, "task_loss": 0.727619469165802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3374579846858978, "epoch": 4.3, "learning_rate": 1.9260958821398385e-05, "loss": 0.385, "step": 5091, "task_loss": 0.7082294225692749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33123183250427246, "epoch": 4.3, "learning_rate": 1.9254920903272553e-05, "loss": 0.4598, "step": 5092, "task_loss": 0.267940491437912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5871395468711853, "epoch": 4.3, "learning_rate": 1.924888298514672e-05, "loss": 0.4493, "step": 5093, "task_loss": 1.3309595584869385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38785362243652344, "epoch": 4.31, "learning_rate": 1.9242845067020894e-05, "loss": 0.3537, "step": 5094, "task_loss": 0.6004380583763123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2825233042240143, "epoch": 4.31, "learning_rate": 1.923680714889506e-05, "loss": 0.3908, "step": 5095, "task_loss": 0.4677309989929199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2843528091907501, "epoch": 4.31, "learning_rate": 1.923076923076923e-05, "loss": 0.3712, "step": 5096, "task_loss": 0.8638784885406494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36929455399513245, "epoch": 4.31, "learning_rate": 1.9224731312643402e-05, "loss": 0.3277, "step": 5097, "task_loss": 0.31668558716773987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49612295627593994, "epoch": 4.31, "learning_rate": 1.921869339451757e-05, "loss": 0.4586, "step": 5098, "task_loss": 1.148868203163147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18416611850261688, "epoch": 4.31, "learning_rate": 1.9212655476391743e-05, "loss": 0.3413, "step": 5099, "task_loss": 0.8668200969696045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30473828315734863, "epoch": 4.31, "learning_rate": 1.920661755826591e-05, "loss": 0.3354, "step": 5100, "task_loss": 0.8354716300964355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4049234986305237, "epoch": 4.31, "learning_rate": 1.920057964014008e-05, "loss": 0.4142, "step": 5101, "task_loss": 0.8979512453079224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3573823571205139, "epoch": 4.31, "learning_rate": 1.919454172201425e-05, "loss": 0.2897, "step": 5102, "task_loss": 0.16238698363304138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33506107330322266, "epoch": 4.31, "learning_rate": 1.918850380388842e-05, "loss": 0.4823, "step": 5103, "task_loss": 1.5611389875411987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18141862750053406, "epoch": 4.31, "learning_rate": 1.918246588576259e-05, "loss": 0.351, "step": 5104, "task_loss": 0.6245715618133545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5728260278701782, "epoch": 4.32, "learning_rate": 1.917642796763676e-05, "loss": 0.3862, "step": 5105, "task_loss": 1.032341718673706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45187968015670776, "epoch": 4.32, "learning_rate": 1.917039004951093e-05, "loss": 0.41, "step": 5106, "task_loss": 1.7320561408996582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5374857187271118, "epoch": 4.32, "learning_rate": 1.91643521313851e-05, "loss": 0.4193, "step": 5107, "task_loss": 0.7176459431648254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2429855465888977, "epoch": 4.32, "learning_rate": 1.9158314213259268e-05, "loss": 0.331, "step": 5108, "task_loss": 0.7569623589515686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47397279739379883, "epoch": 4.32, "learning_rate": 1.915227629513344e-05, "loss": 0.3697, "step": 5109, "task_loss": 0.392328679561615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5569284558296204, "epoch": 4.32, "learning_rate": 1.914623837700761e-05, "loss": 0.4623, "step": 5110, "task_loss": 0.7623511552810669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5459192991256714, "epoch": 4.32, "learning_rate": 1.914020045888178e-05, "loss": 0.4417, "step": 5111, "task_loss": 1.313636064529419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29612499475479126, "epoch": 4.32, "learning_rate": 1.9134162540755947e-05, "loss": 0.2981, "step": 5112, "task_loss": 0.5576233863830566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5133373737335205, "epoch": 4.32, "learning_rate": 1.9128124622630118e-05, "loss": 0.3613, "step": 5113, "task_loss": 0.750019371509552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30919888615608215, "epoch": 4.32, "learning_rate": 1.9122086704504288e-05, "loss": 0.3946, "step": 5114, "task_loss": 0.6036627292633057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4389476180076599, "epoch": 4.32, "learning_rate": 1.911604878637846e-05, "loss": 0.3931, "step": 5115, "task_loss": 0.3081081807613373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35269099473953247, "epoch": 4.32, "learning_rate": 1.911001086825263e-05, "loss": 0.417, "step": 5116, "task_loss": 0.5935120582580566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22424887120723724, "epoch": 4.33, "learning_rate": 1.9103972950126796e-05, "loss": 0.3257, "step": 5117, "task_loss": 0.38606470823287964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2651144564151764, "epoch": 4.33, "learning_rate": 1.9097935032000967e-05, "loss": 0.319, "step": 5118, "task_loss": 0.25840041041374207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3069764971733093, "epoch": 4.33, "learning_rate": 1.9091897113875138e-05, "loss": 0.418, "step": 5119, "task_loss": 0.19716845452785492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42766180634498596, "epoch": 4.33, "learning_rate": 1.9085859195749305e-05, "loss": 0.3762, "step": 5120, "task_loss": 0.557982325553894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3473864197731018, "epoch": 4.33, "learning_rate": 1.907982127762348e-05, "loss": 0.397, "step": 5121, "task_loss": 0.6292860507965088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34075966477394104, "epoch": 4.33, "learning_rate": 1.9073783359497646e-05, "loss": 0.3931, "step": 5122, "task_loss": 1.2440478801727295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6627556681632996, "epoch": 4.33, "learning_rate": 1.9067745441371817e-05, "loss": 0.4277, "step": 5123, "task_loss": 0.9518446922302246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4261639714241028, "epoch": 4.33, "learning_rate": 1.9061707523245987e-05, "loss": 0.3296, "step": 5124, "task_loss": 0.5353721976280212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30596649646759033, "epoch": 4.33, "learning_rate": 1.9055669605120154e-05, "loss": 0.2909, "step": 5125, "task_loss": 0.8765067458152771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.314362496137619, "epoch": 4.33, "learning_rate": 1.9049631686994325e-05, "loss": 0.3672, "step": 5126, "task_loss": 1.2612433433532715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29488134384155273, "epoch": 4.33, "learning_rate": 1.9043593768868495e-05, "loss": 0.4296, "step": 5127, "task_loss": 0.4817865192890167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.238134503364563, "epoch": 4.33, "learning_rate": 1.9037555850742663e-05, "loss": 0.3922, "step": 5128, "task_loss": 1.1704683303833008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7072365880012512, "epoch": 4.34, "learning_rate": 1.9031517932616837e-05, "loss": 0.4555, "step": 5129, "task_loss": 0.8034501075744629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20204365253448486, "epoch": 4.34, "learning_rate": 1.9025480014491004e-05, "loss": 0.3201, "step": 5130, "task_loss": 0.5520434975624084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3306760787963867, "epoch": 4.34, "learning_rate": 1.9019442096365174e-05, "loss": 0.3612, "step": 5131, "task_loss": 0.8900482654571533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.482852578163147, "epoch": 4.34, "learning_rate": 1.9013404178239345e-05, "loss": 0.3972, "step": 5132, "task_loss": 0.9095231294631958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.277945876121521, "epoch": 4.34, "learning_rate": 1.9007366260113512e-05, "loss": 0.3572, "step": 5133, "task_loss": 0.7801541090011597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2794971168041229, "epoch": 4.34, "learning_rate": 1.9001328341987683e-05, "loss": 0.3988, "step": 5134, "task_loss": 0.47030243277549744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2649248540401459, "epoch": 4.34, "learning_rate": 1.8995290423861853e-05, "loss": 0.3968, "step": 5135, "task_loss": 0.32408836483955383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16953937709331512, "epoch": 4.34, "learning_rate": 1.8989252505736024e-05, "loss": 0.2901, "step": 5136, "task_loss": 0.06328903138637543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40590041875839233, "epoch": 4.34, "learning_rate": 1.8983214587610194e-05, "loss": 0.3821, "step": 5137, "task_loss": 0.4570622444152832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4749447703361511, "epoch": 4.34, "learning_rate": 1.897717666948436e-05, "loss": 0.4299, "step": 5138, "task_loss": 0.47292235493659973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3772923946380615, "epoch": 4.34, "learning_rate": 1.8971138751358532e-05, "loss": 0.4152, "step": 5139, "task_loss": 0.29141324758529663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2538264989852905, "epoch": 4.34, "learning_rate": 1.8965100833232703e-05, "loss": 0.2615, "step": 5140, "task_loss": 0.23338404297828674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4799971282482147, "epoch": 4.35, "learning_rate": 1.8959062915106873e-05, "loss": 0.4474, "step": 5141, "task_loss": 0.4372880756855011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4274338483810425, "epoch": 4.35, "learning_rate": 1.895302499698104e-05, "loss": 0.3817, "step": 5142, "task_loss": 0.8300930261611938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4415701627731323, "epoch": 4.35, "learning_rate": 1.894698707885521e-05, "loss": 0.3966, "step": 5143, "task_loss": 0.5089291930198669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1606616973876953, "epoch": 4.35, "learning_rate": 1.894094916072938e-05, "loss": 0.2961, "step": 5144, "task_loss": 0.022366778925061226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2842117249965668, "epoch": 4.35, "learning_rate": 1.8934911242603552e-05, "loss": 0.369, "step": 5145, "task_loss": 0.7027658224105835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42632681131362915, "epoch": 4.35, "learning_rate": 1.8928873324477723e-05, "loss": 0.377, "step": 5146, "task_loss": 0.36064040660858154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1894669234752655, "epoch": 4.35, "learning_rate": 1.892283540635189e-05, "loss": 0.3138, "step": 5147, "task_loss": 0.17319050431251526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37579578161239624, "epoch": 4.35, "learning_rate": 1.891679748822606e-05, "loss": 0.4479, "step": 5148, "task_loss": 0.2923598289489746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5457533597946167, "epoch": 4.35, "learning_rate": 1.891075957010023e-05, "loss": 0.6133, "step": 5149, "task_loss": 0.5107803344726562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31974631547927856, "epoch": 4.35, "learning_rate": 1.8904721651974398e-05, "loss": 0.4367, "step": 5150, "task_loss": 0.8776116371154785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33956989645957947, "epoch": 4.35, "learning_rate": 1.8898683733848572e-05, "loss": 0.3488, "step": 5151, "task_loss": 0.6608722805976868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5406298637390137, "epoch": 4.35, "learning_rate": 1.889264581572274e-05, "loss": 0.507, "step": 5152, "task_loss": 1.1904761791229248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5056542158126831, "epoch": 4.36, "learning_rate": 1.888660789759691e-05, "loss": 0.3798, "step": 5153, "task_loss": 0.9114444255828857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3523990511894226, "epoch": 4.36, "learning_rate": 1.888056997947108e-05, "loss": 0.3807, "step": 5154, "task_loss": 1.1293889284133911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4642389118671417, "epoch": 4.36, "learning_rate": 1.8874532061345248e-05, "loss": 0.4254, "step": 5155, "task_loss": 0.32422202825546265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19578048586845398, "epoch": 4.36, "learning_rate": 1.886849414321942e-05, "loss": 0.3849, "step": 5156, "task_loss": 1.056933045387268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48466283082962036, "epoch": 4.36, "learning_rate": 1.886245622509359e-05, "loss": 0.4443, "step": 5157, "task_loss": 1.1221474409103394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23744215071201324, "epoch": 4.36, "learning_rate": 1.8856418306967756e-05, "loss": 0.3251, "step": 5158, "task_loss": 0.5109015107154846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21997155249118805, "epoch": 4.36, "learning_rate": 1.885038038884193e-05, "loss": 0.3444, "step": 5159, "task_loss": 0.27203935384750366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14578457176685333, "epoch": 4.36, "learning_rate": 1.8844342470716097e-05, "loss": 0.422, "step": 5160, "task_loss": 0.4031499922275543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44084280729293823, "epoch": 4.36, "learning_rate": 1.8838304552590268e-05, "loss": 0.4335, "step": 5161, "task_loss": 0.5768921375274658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20555126667022705, "epoch": 4.36, "learning_rate": 1.8832266634464438e-05, "loss": 0.3363, "step": 5162, "task_loss": 0.06851212680339813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20696000754833221, "epoch": 4.36, "learning_rate": 1.8826228716338605e-05, "loss": 0.3414, "step": 5163, "task_loss": 0.16921471059322357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43298831582069397, "epoch": 4.36, "learning_rate": 1.882019079821278e-05, "loss": 0.5087, "step": 5164, "task_loss": 0.8197500109672546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32425442337989807, "epoch": 4.37, "learning_rate": 1.8814152880086947e-05, "loss": 0.3671, "step": 5165, "task_loss": 0.9149870872497559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44063785672187805, "epoch": 4.37, "learning_rate": 1.8808114961961117e-05, "loss": 0.3914, "step": 5166, "task_loss": 1.4080874919891357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48493722081184387, "epoch": 4.37, "learning_rate": 1.8802077043835288e-05, "loss": 0.4602, "step": 5167, "task_loss": 1.0483784675598145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3543283939361572, "epoch": 4.37, "learning_rate": 1.8796039125709455e-05, "loss": 0.3981, "step": 5168, "task_loss": 0.44391322135925293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4185851216316223, "epoch": 4.37, "learning_rate": 1.8790001207583626e-05, "loss": 0.3582, "step": 5169, "task_loss": 0.5528239011764526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38399577140808105, "epoch": 4.37, "learning_rate": 1.8783963289457796e-05, "loss": 0.3393, "step": 5170, "task_loss": 0.42995867133140564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26206111907958984, "epoch": 4.37, "learning_rate": 1.8777925371331967e-05, "loss": 0.517, "step": 5171, "task_loss": 0.9646754860877991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31453847885131836, "epoch": 4.37, "learning_rate": 1.8771887453206137e-05, "loss": 0.272, "step": 5172, "task_loss": 0.49069830775260925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6315782070159912, "epoch": 4.37, "learning_rate": 1.8765849535080304e-05, "loss": 0.4116, "step": 5173, "task_loss": 0.3364013731479645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21864697337150574, "epoch": 4.37, "learning_rate": 1.8759811616954475e-05, "loss": 0.3854, "step": 5174, "task_loss": 0.12269914150238037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4038107693195343, "epoch": 4.37, "learning_rate": 1.8753773698828646e-05, "loss": 0.3485, "step": 5175, "task_loss": 0.9827893972396851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32384249567985535, "epoch": 4.38, "learning_rate": 1.8747735780702816e-05, "loss": 0.3311, "step": 5176, "task_loss": 1.3772859573364258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5343233346939087, "epoch": 4.38, "learning_rate": 1.8741697862576983e-05, "loss": 0.5031, "step": 5177, "task_loss": 0.19899597764015198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4851059913635254, "epoch": 4.38, "learning_rate": 1.8735659944451154e-05, "loss": 0.3908, "step": 5178, "task_loss": 0.6490619778633118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40302425622940063, "epoch": 4.38, "learning_rate": 1.8729622026325324e-05, "loss": 0.4094, "step": 5179, "task_loss": 0.6757632493972778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22897475957870483, "epoch": 4.38, "learning_rate": 1.8723584108199495e-05, "loss": 0.3924, "step": 5180, "task_loss": 0.7136711478233337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4588503837585449, "epoch": 4.38, "learning_rate": 1.8717546190073666e-05, "loss": 0.3711, "step": 5181, "task_loss": 0.38395601511001587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3370903730392456, "epoch": 4.38, "learning_rate": 1.8711508271947833e-05, "loss": 0.453, "step": 5182, "task_loss": 0.9747654795646667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4333541989326477, "epoch": 4.38, "learning_rate": 1.8705470353822003e-05, "loss": 0.4794, "step": 5183, "task_loss": 0.8821219801902771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5052077174186707, "epoch": 4.38, "learning_rate": 1.8699432435696174e-05, "loss": 0.4747, "step": 5184, "task_loss": 0.6203513145446777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28383660316467285, "epoch": 4.38, "learning_rate": 1.869339451757034e-05, "loss": 0.3096, "step": 5185, "task_loss": 0.2119908481836319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5348623394966125, "epoch": 4.38, "learning_rate": 1.8687356599444515e-05, "loss": 0.3413, "step": 5186, "task_loss": 0.4476745128631592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25258657336235046, "epoch": 4.38, "learning_rate": 1.8681318681318682e-05, "loss": 0.3054, "step": 5187, "task_loss": 0.7500442266464233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34689319133758545, "epoch": 4.39, "learning_rate": 1.8675280763192853e-05, "loss": 0.3565, "step": 5188, "task_loss": 0.5860693454742432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5495946407318115, "epoch": 4.39, "learning_rate": 1.8669242845067023e-05, "loss": 0.3746, "step": 5189, "task_loss": 0.1435060203075409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37231361865997314, "epoch": 4.39, "learning_rate": 1.866320492694119e-05, "loss": 0.4531, "step": 5190, "task_loss": 0.9019960761070251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17058825492858887, "epoch": 4.39, "learning_rate": 1.865716700881536e-05, "loss": 0.2894, "step": 5191, "task_loss": 0.1386742740869522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3873181939125061, "epoch": 4.39, "learning_rate": 1.8651129090689532e-05, "loss": 0.407, "step": 5192, "task_loss": 0.9682409763336182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26013413071632385, "epoch": 4.39, "learning_rate": 1.86450911725637e-05, "loss": 0.3113, "step": 5193, "task_loss": 0.6219673156738281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3314706087112427, "epoch": 4.39, "learning_rate": 1.8639053254437873e-05, "loss": 0.3382, "step": 5194, "task_loss": 0.6744981408119202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2784534692764282, "epoch": 4.39, "learning_rate": 1.863301533631204e-05, "loss": 0.3058, "step": 5195, "task_loss": 0.06798100471496582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48460912704467773, "epoch": 4.39, "learning_rate": 1.862697741818621e-05, "loss": 0.4409, "step": 5196, "task_loss": 0.11434780061244965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5388981103897095, "epoch": 4.39, "learning_rate": 1.862093950006038e-05, "loss": 0.3167, "step": 5197, "task_loss": 0.3456410765647888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3394423723220825, "epoch": 4.39, "learning_rate": 1.861490158193455e-05, "loss": 0.4723, "step": 5198, "task_loss": 0.6163389682769775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3243570029735565, "epoch": 4.39, "learning_rate": 1.860886366380872e-05, "loss": 0.4224, "step": 5199, "task_loss": 1.5306345224380493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24484072625637054, "epoch": 4.4, "learning_rate": 1.860282574568289e-05, "loss": 0.3267, "step": 5200, "task_loss": 0.5008615851402283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24701336026191711, "epoch": 4.4, "learning_rate": 1.8596787827557057e-05, "loss": 0.3913, "step": 5201, "task_loss": 1.0068578720092773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3314456343650818, "epoch": 4.4, "learning_rate": 1.859074990943123e-05, "loss": 0.3597, "step": 5202, "task_loss": 0.5203906297683716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21819564700126648, "epoch": 4.4, "learning_rate": 1.8584711991305398e-05, "loss": 0.3285, "step": 5203, "task_loss": 0.7640011310577393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5535624027252197, "epoch": 4.4, "learning_rate": 1.857867407317957e-05, "loss": 0.4744, "step": 5204, "task_loss": 0.7002655267715454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3908206522464752, "epoch": 4.4, "learning_rate": 1.857263615505374e-05, "loss": 0.3658, "step": 5205, "task_loss": 0.8256690502166748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17135754227638245, "epoch": 4.4, "learning_rate": 1.8566598236927906e-05, "loss": 0.414, "step": 5206, "task_loss": 0.2395799458026886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44447070360183716, "epoch": 4.4, "learning_rate": 1.8560560318802077e-05, "loss": 0.3928, "step": 5207, "task_loss": 0.5143520832061768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24747595191001892, "epoch": 4.4, "learning_rate": 1.8554522400676247e-05, "loss": 0.3387, "step": 5208, "task_loss": 0.7363437414169312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5200941562652588, "epoch": 4.4, "learning_rate": 1.8548484482550418e-05, "loss": 0.3876, "step": 5209, "task_loss": 0.7225891947746277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4488714337348938, "epoch": 4.4, "learning_rate": 1.854244656442459e-05, "loss": 0.4833, "step": 5210, "task_loss": 0.838161051273346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3297135829925537, "epoch": 4.4, "learning_rate": 1.8536408646298756e-05, "loss": 0.337, "step": 5211, "task_loss": 0.24547216296195984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2602151930332184, "epoch": 4.41, "learning_rate": 1.8530370728172926e-05, "loss": 0.3275, "step": 5212, "task_loss": 1.121696949005127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3172169029712677, "epoch": 4.41, "learning_rate": 1.8524332810047097e-05, "loss": 0.4225, "step": 5213, "task_loss": 0.5589432120323181 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23712073266506195, "epoch": 4.41, "learning_rate": 1.8518294891921267e-05, "loss": 0.3118, "step": 5214, "task_loss": 0.33803677558898926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31289544701576233, "epoch": 4.41, "learning_rate": 1.8512256973795435e-05, "loss": 0.3755, "step": 5215, "task_loss": 0.6297552585601807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4040418267250061, "epoch": 4.41, "learning_rate": 1.8506219055669605e-05, "loss": 0.3987, "step": 5216, "task_loss": 1.2184408903121948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4308493137359619, "epoch": 4.41, "learning_rate": 1.8500181137543776e-05, "loss": 0.4065, "step": 5217, "task_loss": 2.1417481899261475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33825576305389404, "epoch": 4.41, "learning_rate": 1.8494143219417946e-05, "loss": 0.4688, "step": 5218, "task_loss": 0.5528981685638428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36832231283187866, "epoch": 4.41, "learning_rate": 1.8488105301292117e-05, "loss": 0.4622, "step": 5219, "task_loss": 0.5385500192642212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19545269012451172, "epoch": 4.41, "learning_rate": 1.8482067383166284e-05, "loss": 0.3389, "step": 5220, "task_loss": 0.4970904588699341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32087868452072144, "epoch": 4.41, "learning_rate": 1.8476029465040455e-05, "loss": 0.2827, "step": 5221, "task_loss": 1.0092705488204956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45641690492630005, "epoch": 4.41, "learning_rate": 1.8469991546914625e-05, "loss": 0.3983, "step": 5222, "task_loss": 0.3643428683280945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2684268057346344, "epoch": 4.41, "learning_rate": 1.8463953628788792e-05, "loss": 0.513, "step": 5223, "task_loss": 1.0245673656463623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42989689111709595, "epoch": 4.42, "learning_rate": 1.8457915710662966e-05, "loss": 0.4323, "step": 5224, "task_loss": 0.6919485330581665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7307767868041992, "epoch": 4.42, "learning_rate": 1.8451877792537133e-05, "loss": 0.4076, "step": 5225, "task_loss": 0.15413440763950348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47299283742904663, "epoch": 4.42, "learning_rate": 1.8445839874411304e-05, "loss": 0.3954, "step": 5226, "task_loss": 0.6069705486297607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5587657690048218, "epoch": 4.42, "learning_rate": 1.8439801956285475e-05, "loss": 0.3824, "step": 5227, "task_loss": 0.5959059596061707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3794844448566437, "epoch": 4.42, "learning_rate": 1.8433764038159642e-05, "loss": 0.4353, "step": 5228, "task_loss": 0.6033614873886108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19884192943572998, "epoch": 4.42, "learning_rate": 1.8427726120033816e-05, "loss": 0.3243, "step": 5229, "task_loss": 0.33393174409866333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44313690066337585, "epoch": 4.42, "learning_rate": 1.8421688201907983e-05, "loss": 0.4385, "step": 5230, "task_loss": 0.47637709975242615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3105534315109253, "epoch": 4.42, "learning_rate": 1.841565028378215e-05, "loss": 0.483, "step": 5231, "task_loss": 0.9140769839286804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3152158260345459, "epoch": 4.42, "learning_rate": 1.8409612365656324e-05, "loss": 0.3171, "step": 5232, "task_loss": 0.395473837852478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31655555963516235, "epoch": 4.42, "learning_rate": 1.840357444753049e-05, "loss": 0.3829, "step": 5233, "task_loss": 1.0136085748672485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43258965015411377, "epoch": 4.42, "learning_rate": 1.8397536529404662e-05, "loss": 0.4059, "step": 5234, "task_loss": 0.5866233110427856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2276366949081421, "epoch": 4.42, "learning_rate": 1.8391498611278832e-05, "loss": 0.3409, "step": 5235, "task_loss": 0.8556416630744934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5262447595596313, "epoch": 4.43, "learning_rate": 1.8385460693153e-05, "loss": 0.4606, "step": 5236, "task_loss": 1.2295241355895996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36971622705459595, "epoch": 4.43, "learning_rate": 1.8379422775027174e-05, "loss": 0.3765, "step": 5237, "task_loss": 0.13858358561992645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3056156635284424, "epoch": 4.43, "learning_rate": 1.837338485690134e-05, "loss": 0.4406, "step": 5238, "task_loss": 0.6817382574081421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4593098759651184, "epoch": 4.43, "learning_rate": 1.836734693877551e-05, "loss": 0.4732, "step": 5239, "task_loss": 0.6961773633956909 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1813804656267166, "epoch": 4.43, "learning_rate": 1.8361309020649682e-05, "loss": 0.3137, "step": 5240, "task_loss": 0.5781649351119995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4542540907859802, "epoch": 4.43, "learning_rate": 1.835527110252385e-05, "loss": 0.4847, "step": 5241, "task_loss": 0.9892956614494324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44765907526016235, "epoch": 4.43, "learning_rate": 1.834923318439802e-05, "loss": 0.3522, "step": 5242, "task_loss": 0.41574469208717346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2585853338241577, "epoch": 4.43, "learning_rate": 1.834319526627219e-05, "loss": 0.4923, "step": 5243, "task_loss": 0.05067348852753639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3100784420967102, "epoch": 4.43, "learning_rate": 1.833715734814636e-05, "loss": 0.3582, "step": 5244, "task_loss": 1.1714766025543213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38940513134002686, "epoch": 4.43, "learning_rate": 1.833111943002053e-05, "loss": 0.4343, "step": 5245, "task_loss": 0.43075788021087646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27685123682022095, "epoch": 4.43, "learning_rate": 1.83250815118947e-05, "loss": 0.3321, "step": 5246, "task_loss": 0.8146538734436035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4948709011077881, "epoch": 4.44, "learning_rate": 1.831904359376887e-05, "loss": 0.3481, "step": 5247, "task_loss": 0.6484357714653015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3459925949573517, "epoch": 4.44, "learning_rate": 1.831300567564304e-05, "loss": 0.3474, "step": 5248, "task_loss": 0.20509374141693115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37156400084495544, "epoch": 4.44, "learning_rate": 1.830696775751721e-05, "loss": 0.4054, "step": 5249, "task_loss": 0.7029798626899719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5563976764678955, "epoch": 4.44, "learning_rate": 1.8300929839391377e-05, "loss": 0.4384, "step": 5250, "task_loss": 0.30441462993621826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39709222316741943, "epoch": 4.44, "learning_rate": 1.8294891921265548e-05, "loss": 0.3227, "step": 5251, "task_loss": 0.26614049077033997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4133065938949585, "epoch": 4.44, "learning_rate": 1.828885400313972e-05, "loss": 0.4607, "step": 5252, "task_loss": 1.0160808563232422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2712053060531616, "epoch": 4.44, "learning_rate": 1.828281608501389e-05, "loss": 0.3414, "step": 5253, "task_loss": 0.8016608953475952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2910124957561493, "epoch": 4.44, "learning_rate": 1.827677816688806e-05, "loss": 0.4402, "step": 5254, "task_loss": 0.562380850315094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4146055579185486, "epoch": 4.44, "learning_rate": 1.8270740248762227e-05, "loss": 0.3678, "step": 5255, "task_loss": 1.0739461183547974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.12908151745796204, "epoch": 4.44, "learning_rate": 1.8264702330636397e-05, "loss": 0.3336, "step": 5256, "task_loss": 0.24716438353061676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28849077224731445, "epoch": 4.44, "learning_rate": 1.8258664412510568e-05, "loss": 0.3994, "step": 5257, "task_loss": 0.29690155386924744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31948238611221313, "epoch": 4.44, "learning_rate": 1.8252626494384735e-05, "loss": 0.2818, "step": 5258, "task_loss": 0.7516115307807922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35735929012298584, "epoch": 4.45, "learning_rate": 1.824658857625891e-05, "loss": 0.3704, "step": 5259, "task_loss": 0.6015775203704834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5161853432655334, "epoch": 4.45, "learning_rate": 1.8240550658133076e-05, "loss": 0.358, "step": 5260, "task_loss": 0.14585445821285248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5641673803329468, "epoch": 4.45, "learning_rate": 1.8234512740007244e-05, "loss": 0.4938, "step": 5261, "task_loss": 0.6914381384849548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2951531708240509, "epoch": 4.45, "learning_rate": 1.8228474821881417e-05, "loss": 0.4731, "step": 5262, "task_loss": 0.12317049503326416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.12014356255531311, "epoch": 4.45, "learning_rate": 1.8222436903755585e-05, "loss": 0.3368, "step": 5263, "task_loss": 0.3046962320804596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21899142861366272, "epoch": 4.45, "learning_rate": 1.8216398985629755e-05, "loss": 0.3273, "step": 5264, "task_loss": 0.3404634892940521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4435215890407562, "epoch": 4.45, "learning_rate": 1.8210361067503926e-05, "loss": 0.3096, "step": 5265, "task_loss": 0.40148040652275085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6356015205383301, "epoch": 4.45, "learning_rate": 1.8204323149378093e-05, "loss": 0.5718, "step": 5266, "task_loss": 1.2403342723846436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34512200951576233, "epoch": 4.45, "learning_rate": 1.8198285231252267e-05, "loss": 0.3644, "step": 5267, "task_loss": 1.7708070278167725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6186854839324951, "epoch": 4.45, "learning_rate": 1.8192247313126434e-05, "loss": 0.369, "step": 5268, "task_loss": 0.39630216360092163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3231382966041565, "epoch": 4.45, "learning_rate": 1.8186209395000605e-05, "loss": 0.3226, "step": 5269, "task_loss": 0.2564380466938019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5450466871261597, "epoch": 4.45, "learning_rate": 1.8180171476874775e-05, "loss": 0.3782, "step": 5270, "task_loss": 1.2790533304214478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3480446934700012, "epoch": 4.46, "learning_rate": 1.8174133558748942e-05, "loss": 0.3434, "step": 5271, "task_loss": 0.3185195028781891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32575035095214844, "epoch": 4.46, "learning_rate": 1.8168095640623113e-05, "loss": 0.3397, "step": 5272, "task_loss": 0.604731559753418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5215228199958801, "epoch": 4.46, "learning_rate": 1.8162057722497284e-05, "loss": 0.3826, "step": 5273, "task_loss": 0.48162564635276794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3289278745651245, "epoch": 4.46, "learning_rate": 1.8156019804371454e-05, "loss": 0.4171, "step": 5274, "task_loss": 0.27593308687210083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28610560297966003, "epoch": 4.46, "learning_rate": 1.8149981886245625e-05, "loss": 0.2985, "step": 5275, "task_loss": 0.44557493925094604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38473042845726013, "epoch": 4.46, "learning_rate": 1.8143943968119792e-05, "loss": 0.449, "step": 5276, "task_loss": 1.0577466487884521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3246760368347168, "epoch": 4.46, "learning_rate": 1.8137906049993963e-05, "loss": 0.3186, "step": 5277, "task_loss": 0.6398707628250122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17848613858222961, "epoch": 4.46, "learning_rate": 1.8131868131868133e-05, "loss": 0.3149, "step": 5278, "task_loss": 0.08701655268669128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43216097354888916, "epoch": 4.46, "learning_rate": 1.8125830213742304e-05, "loss": 0.3297, "step": 5279, "task_loss": 0.8069826364517212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4437862038612366, "epoch": 4.46, "learning_rate": 1.811979229561647e-05, "loss": 0.4245, "step": 5280, "task_loss": 0.9651049971580505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.274081826210022, "epoch": 4.46, "learning_rate": 1.811375437749064e-05, "loss": 0.3768, "step": 5281, "task_loss": 0.4732531011104584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5638551712036133, "epoch": 4.46, "learning_rate": 1.8107716459364812e-05, "loss": 0.3765, "step": 5282, "task_loss": 1.3967581987380981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34827864170074463, "epoch": 4.47, "learning_rate": 1.8101678541238983e-05, "loss": 0.3588, "step": 5283, "task_loss": 0.8862443566322327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32658180594444275, "epoch": 4.47, "learning_rate": 1.8095640623113153e-05, "loss": 0.4338, "step": 5284, "task_loss": 0.47399216890335083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.251056432723999, "epoch": 4.47, "learning_rate": 1.808960270498732e-05, "loss": 0.4035, "step": 5285, "task_loss": 0.12106680870056152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30151110887527466, "epoch": 4.47, "learning_rate": 1.808356478686149e-05, "loss": 0.408, "step": 5286, "task_loss": 0.6294260025024414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37527674436569214, "epoch": 4.47, "learning_rate": 1.807752686873566e-05, "loss": 0.3516, "step": 5287, "task_loss": 0.33889007568359375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25799840688705444, "epoch": 4.47, "learning_rate": 1.807148895060983e-05, "loss": 0.3325, "step": 5288, "task_loss": 0.6227168440818787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24380311369895935, "epoch": 4.47, "learning_rate": 1.8065451032484003e-05, "loss": 0.3068, "step": 5289, "task_loss": 0.5342955589294434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4528445601463318, "epoch": 4.47, "learning_rate": 1.805941311435817e-05, "loss": 0.4836, "step": 5290, "task_loss": 0.15673843026161194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43372344970703125, "epoch": 4.47, "learning_rate": 1.805337519623234e-05, "loss": 0.4272, "step": 5291, "task_loss": 0.3752705156803131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45831921696662903, "epoch": 4.47, "learning_rate": 1.804733727810651e-05, "loss": 0.3568, "step": 5292, "task_loss": 0.2801412343978882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3460584580898285, "epoch": 4.47, "learning_rate": 1.8041299359980678e-05, "loss": 0.3816, "step": 5293, "task_loss": 0.2997775375843048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3055708706378937, "epoch": 4.47, "learning_rate": 1.8035261441854852e-05, "loss": 0.3606, "step": 5294, "task_loss": 0.4287426471710205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31929993629455566, "epoch": 4.48, "learning_rate": 1.802922352372902e-05, "loss": 0.3397, "step": 5295, "task_loss": 0.814228892326355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21768741309642792, "epoch": 4.48, "learning_rate": 1.8023185605603186e-05, "loss": 0.3299, "step": 5296, "task_loss": 1.053403377532959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32588931918144226, "epoch": 4.48, "learning_rate": 1.801714768747736e-05, "loss": 0.3083, "step": 5297, "task_loss": 0.313795268535614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5382324457168579, "epoch": 4.48, "learning_rate": 1.8011109769351528e-05, "loss": 0.5048, "step": 5298, "task_loss": 0.8978071212768555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6751061677932739, "epoch": 4.48, "learning_rate": 1.8005071851225698e-05, "loss": 0.4469, "step": 5299, "task_loss": 0.5377448797225952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19967341423034668, "epoch": 4.48, "learning_rate": 1.799903393309987e-05, "loss": 0.3384, "step": 5300, "task_loss": 0.34261590242385864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5783663392066956, "epoch": 4.48, "learning_rate": 1.7992996014974036e-05, "loss": 0.4165, "step": 5301, "task_loss": 1.9989256858825684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2799167335033417, "epoch": 4.48, "learning_rate": 1.798695809684821e-05, "loss": 0.4053, "step": 5302, "task_loss": 0.3276396095752716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36858734488487244, "epoch": 4.48, "learning_rate": 1.7980920178722377e-05, "loss": 0.362, "step": 5303, "task_loss": 1.4640973806381226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7968460321426392, "epoch": 4.48, "learning_rate": 1.7974882260596548e-05, "loss": 0.7162, "step": 5304, "task_loss": 1.7535889148712158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29469823837280273, "epoch": 4.48, "learning_rate": 1.7968844342470718e-05, "loss": 0.4626, "step": 5305, "task_loss": 0.5049771070480347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21716442704200745, "epoch": 4.48, "learning_rate": 1.7962806424344885e-05, "loss": 0.2456, "step": 5306, "task_loss": 0.25472405552864075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40696483850479126, "epoch": 4.49, "learning_rate": 1.7956768506219056e-05, "loss": 0.436, "step": 5307, "task_loss": 0.47935688495635986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35697677731513977, "epoch": 4.49, "learning_rate": 1.7950730588093226e-05, "loss": 0.4416, "step": 5308, "task_loss": 0.5401471257209778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4117048978805542, "epoch": 4.49, "learning_rate": 1.7944692669967397e-05, "loss": 0.3442, "step": 5309, "task_loss": 1.3099416494369507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3824571371078491, "epoch": 4.49, "learning_rate": 1.7938654751841568e-05, "loss": 0.4181, "step": 5310, "task_loss": 1.3757015466690063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3536352217197418, "epoch": 4.49, "learning_rate": 1.7932616833715735e-05, "loss": 0.4711, "step": 5311, "task_loss": 0.6031944751739502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2576790750026703, "epoch": 4.49, "learning_rate": 1.7926578915589905e-05, "loss": 0.3906, "step": 5312, "task_loss": 0.36705759167671204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3616946041584015, "epoch": 4.49, "learning_rate": 1.7920540997464076e-05, "loss": 0.4349, "step": 5313, "task_loss": 0.45044851303100586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3789399266242981, "epoch": 4.49, "learning_rate": 1.7914503079338247e-05, "loss": 0.3453, "step": 5314, "task_loss": 0.8138585090637207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32471197843551636, "epoch": 4.49, "learning_rate": 1.7908465161212414e-05, "loss": 0.4174, "step": 5315, "task_loss": 0.0605563260614872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3985297679901123, "epoch": 4.49, "learning_rate": 1.7902427243086584e-05, "loss": 0.3454, "step": 5316, "task_loss": 0.555727481842041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1711488515138626, "epoch": 4.49, "learning_rate": 1.7896389324960755e-05, "loss": 0.3146, "step": 5317, "task_loss": 0.12062321603298187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4877442717552185, "epoch": 4.5, "learning_rate": 1.7890351406834922e-05, "loss": 0.364, "step": 5318, "task_loss": 0.24681709706783295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40454229712486267, "epoch": 4.5, "learning_rate": 1.7884313488709096e-05, "loss": 0.3853, "step": 5319, "task_loss": 0.773638904094696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.415262371301651, "epoch": 4.5, "learning_rate": 1.7878275570583263e-05, "loss": 0.4239, "step": 5320, "task_loss": 0.9999488592147827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4641116261482239, "epoch": 4.5, "learning_rate": 1.7872237652457434e-05, "loss": 0.4358, "step": 5321, "task_loss": 0.45925864577293396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2554561495780945, "epoch": 4.5, "learning_rate": 1.7866199734331604e-05, "loss": 0.2977, "step": 5322, "task_loss": 0.21244850754737854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2184750735759735, "epoch": 4.5, "learning_rate": 1.786016181620577e-05, "loss": 0.2901, "step": 5323, "task_loss": 0.0525239072740078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36078768968582153, "epoch": 4.5, "learning_rate": 1.7854123898079945e-05, "loss": 0.4059, "step": 5324, "task_loss": 0.11358994245529175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33003711700439453, "epoch": 4.5, "learning_rate": 1.7848085979954113e-05, "loss": 0.3828, "step": 5325, "task_loss": 0.33040279150009155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17648938298225403, "epoch": 4.5, "learning_rate": 1.784204806182828e-05, "loss": 0.4456, "step": 5326, "task_loss": 0.5753427147865295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4023174047470093, "epoch": 4.5, "learning_rate": 1.7836010143702454e-05, "loss": 0.3072, "step": 5327, "task_loss": 0.371571809053421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42026692628860474, "epoch": 4.5, "learning_rate": 1.782997222557662e-05, "loss": 0.3478, "step": 5328, "task_loss": 0.64959716796875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3119339942932129, "epoch": 4.5, "learning_rate": 1.782393430745079e-05, "loss": 0.3783, "step": 5329, "task_loss": 0.6669889688491821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21444310247898102, "epoch": 4.51, "learning_rate": 1.7817896389324962e-05, "loss": 0.3306, "step": 5330, "task_loss": 0.05500154569745064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45741474628448486, "epoch": 4.51, "learning_rate": 1.781185847119913e-05, "loss": 0.3356, "step": 5331, "task_loss": 1.036494255065918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2424495816230774, "epoch": 4.51, "learning_rate": 1.7805820553073303e-05, "loss": 0.3774, "step": 5332, "task_loss": 0.7866775393486023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2625657021999359, "epoch": 4.51, "learning_rate": 1.779978263494747e-05, "loss": 0.3694, "step": 5333, "task_loss": 0.20474475622177124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4337954819202423, "epoch": 4.51, "learning_rate": 1.779374471682164e-05, "loss": 0.5179, "step": 5334, "task_loss": 1.0557105541229248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6008363962173462, "epoch": 4.51, "learning_rate": 1.778770679869581e-05, "loss": 0.4984, "step": 5335, "task_loss": 1.1767793893814087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3729902505874634, "epoch": 4.51, "learning_rate": 1.778166888056998e-05, "loss": 0.3518, "step": 5336, "task_loss": 0.679702639579773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3285251259803772, "epoch": 4.51, "learning_rate": 1.777563096244415e-05, "loss": 0.3158, "step": 5337, "task_loss": 0.11476276814937592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35734671354293823, "epoch": 4.51, "learning_rate": 1.776959304431832e-05, "loss": 0.4147, "step": 5338, "task_loss": 1.3082064390182495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5004211664199829, "epoch": 4.51, "learning_rate": 1.776355512619249e-05, "loss": 0.3747, "step": 5339, "task_loss": 0.6727070212364197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2664099335670471, "epoch": 4.51, "learning_rate": 1.775751720806666e-05, "loss": 0.3885, "step": 5340, "task_loss": 0.8408298492431641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42336180806159973, "epoch": 4.51, "learning_rate": 1.7751479289940828e-05, "loss": 0.3983, "step": 5341, "task_loss": 0.6281353831291199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40358757972717285, "epoch": 4.52, "learning_rate": 1.7745441371815e-05, "loss": 0.3711, "step": 5342, "task_loss": 0.10854684561491013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3187665343284607, "epoch": 4.52, "learning_rate": 1.773940345368917e-05, "loss": 0.3889, "step": 5343, "task_loss": 0.08983475714921951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6567447185516357, "epoch": 4.52, "learning_rate": 1.773336553556334e-05, "loss": 0.4437, "step": 5344, "task_loss": 0.8507108688354492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20844581723213196, "epoch": 4.52, "learning_rate": 1.7727327617437507e-05, "loss": 0.3087, "step": 5345, "task_loss": 0.3440888524055481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22032380104064941, "epoch": 4.52, "learning_rate": 1.7721289699311678e-05, "loss": 0.438, "step": 5346, "task_loss": 0.19203723967075348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44369426369667053, "epoch": 4.52, "learning_rate": 1.7715251781185848e-05, "loss": 0.408, "step": 5347, "task_loss": 1.3506633043289185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3247999846935272, "epoch": 4.52, "learning_rate": 1.770921386306002e-05, "loss": 0.3637, "step": 5348, "task_loss": 0.5121880769729614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.375909686088562, "epoch": 4.52, "learning_rate": 1.770317594493419e-05, "loss": 0.3846, "step": 5349, "task_loss": 0.5873537063598633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3435758054256439, "epoch": 4.52, "learning_rate": 1.7697138026808357e-05, "loss": 0.3706, "step": 5350, "task_loss": 0.9540627002716064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24243029952049255, "epoch": 4.52, "learning_rate": 1.7691100108682527e-05, "loss": 0.3798, "step": 5351, "task_loss": 0.4624219834804535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26076582074165344, "epoch": 4.52, "learning_rate": 1.7685062190556698e-05, "loss": 0.3437, "step": 5352, "task_loss": 0.10617595911026001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5793465375900269, "epoch": 4.52, "learning_rate": 1.7679024272430865e-05, "loss": 0.3974, "step": 5353, "task_loss": 0.9897788763046265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3009145259857178, "epoch": 4.53, "learning_rate": 1.767298635430504e-05, "loss": 0.3007, "step": 5354, "task_loss": 0.6938503384590149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28061360120773315, "epoch": 4.53, "learning_rate": 1.7666948436179206e-05, "loss": 0.3453, "step": 5355, "task_loss": 0.4369708001613617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3344886302947998, "epoch": 4.53, "learning_rate": 1.7660910518053377e-05, "loss": 0.3717, "step": 5356, "task_loss": 0.9109060168266296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16366496682167053, "epoch": 4.53, "learning_rate": 1.7654872599927547e-05, "loss": 0.3725, "step": 5357, "task_loss": 0.14528045058250427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6055630445480347, "epoch": 4.53, "learning_rate": 1.7648834681801714e-05, "loss": 0.4293, "step": 5358, "task_loss": 1.0309191942214966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4491041302680969, "epoch": 4.53, "learning_rate": 1.764279676367589e-05, "loss": 0.4169, "step": 5359, "task_loss": 1.1094788312911987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4425193965435028, "epoch": 4.53, "learning_rate": 1.7636758845550056e-05, "loss": 0.414, "step": 5360, "task_loss": 0.8031342029571533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27062904834747314, "epoch": 4.53, "learning_rate": 1.7630720927424223e-05, "loss": 0.2993, "step": 5361, "task_loss": 0.13187824189662933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.507653534412384, "epoch": 4.53, "learning_rate": 1.7624683009298397e-05, "loss": 0.4284, "step": 5362, "task_loss": 0.3551993668079376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46033433079719543, "epoch": 4.53, "learning_rate": 1.7618645091172564e-05, "loss": 0.5156, "step": 5363, "task_loss": 1.2253702878952026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39749085903167725, "epoch": 4.53, "learning_rate": 1.7612607173046734e-05, "loss": 0.3665, "step": 5364, "task_loss": 0.49118587374687195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4747875928878784, "epoch": 4.53, "learning_rate": 1.7606569254920905e-05, "loss": 0.3801, "step": 5365, "task_loss": 0.5014544725418091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31555700302124023, "epoch": 4.54, "learning_rate": 1.7600531336795072e-05, "loss": 0.3099, "step": 5366, "task_loss": 0.6813902854919434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3368396759033203, "epoch": 4.54, "learning_rate": 1.7594493418669246e-05, "loss": 0.3195, "step": 5367, "task_loss": 0.37217772006988525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2253040075302124, "epoch": 4.54, "learning_rate": 1.7588455500543413e-05, "loss": 0.2757, "step": 5368, "task_loss": 0.19171622395515442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2980825901031494, "epoch": 4.54, "learning_rate": 1.7582417582417584e-05, "loss": 0.4122, "step": 5369, "task_loss": 0.5793156027793884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3221207559108734, "epoch": 4.54, "learning_rate": 1.7576379664291754e-05, "loss": 0.3025, "step": 5370, "task_loss": 0.6263107061386108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36490318179130554, "epoch": 4.54, "learning_rate": 1.757034174616592e-05, "loss": 0.4004, "step": 5371, "task_loss": 0.426070898771286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24094724655151367, "epoch": 4.54, "learning_rate": 1.7564303828040092e-05, "loss": 0.2852, "step": 5372, "task_loss": 0.45473775267601013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25946730375289917, "epoch": 4.54, "learning_rate": 1.7558265909914263e-05, "loss": 0.335, "step": 5373, "task_loss": 0.8338531255722046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2288549691438675, "epoch": 4.54, "learning_rate": 1.755222799178843e-05, "loss": 0.3231, "step": 5374, "task_loss": 0.2956581711769104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3367187976837158, "epoch": 4.54, "learning_rate": 1.7546190073662604e-05, "loss": 0.4234, "step": 5375, "task_loss": 0.3332315981388092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2816900312900543, "epoch": 4.54, "learning_rate": 1.754015215553677e-05, "loss": 0.4622, "step": 5376, "task_loss": 0.07626615464687347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3616325855255127, "epoch": 4.54, "learning_rate": 1.753411423741094e-05, "loss": 0.3651, "step": 5377, "task_loss": 0.43590912222862244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26656574010849, "epoch": 4.55, "learning_rate": 1.7528076319285112e-05, "loss": 0.3319, "step": 5378, "task_loss": 0.42208003997802734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44225651025772095, "epoch": 4.55, "learning_rate": 1.752203840115928e-05, "loss": 0.45, "step": 5379, "task_loss": 0.300481915473938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6461684107780457, "epoch": 4.55, "learning_rate": 1.751600048303345e-05, "loss": 0.4632, "step": 5380, "task_loss": 0.6893592476844788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37645918130874634, "epoch": 4.55, "learning_rate": 1.750996256490762e-05, "loss": 0.3959, "step": 5381, "task_loss": 0.7296290993690491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.335674911737442, "epoch": 4.55, "learning_rate": 1.750392464678179e-05, "loss": 0.2941, "step": 5382, "task_loss": 0.448097825050354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3674439489841461, "epoch": 4.55, "learning_rate": 1.749788672865596e-05, "loss": 0.412, "step": 5383, "task_loss": 0.8867709636688232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37540245056152344, "epoch": 4.55, "learning_rate": 1.749184881053013e-05, "loss": 0.3885, "step": 5384, "task_loss": 0.2022627294063568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3429049849510193, "epoch": 4.55, "learning_rate": 1.74858108924043e-05, "loss": 0.4619, "step": 5385, "task_loss": 1.0855388641357422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30768972635269165, "epoch": 4.55, "learning_rate": 1.747977297427847e-05, "loss": 0.4295, "step": 5386, "task_loss": 1.0187420845031738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2213277518749237, "epoch": 4.55, "learning_rate": 1.747373505615264e-05, "loss": 0.3973, "step": 5387, "task_loss": 1.5184216499328613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41475909948349, "epoch": 4.55, "learning_rate": 1.7467697138026808e-05, "loss": 0.4533, "step": 5388, "task_loss": 1.0703476667404175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5757153034210205, "epoch": 4.56, "learning_rate": 1.746165921990098e-05, "loss": 0.3572, "step": 5389, "task_loss": 1.390520691871643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6391990184783936, "epoch": 4.56, "learning_rate": 1.745562130177515e-05, "loss": 0.5671, "step": 5390, "task_loss": 2.231200933456421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4100692868232727, "epoch": 4.56, "learning_rate": 1.7449583383649316e-05, "loss": 0.4108, "step": 5391, "task_loss": 1.2216544151306152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30174511671066284, "epoch": 4.56, "learning_rate": 1.744354546552349e-05, "loss": 0.3496, "step": 5392, "task_loss": 0.286838561296463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20696011185646057, "epoch": 4.56, "learning_rate": 1.7437507547397657e-05, "loss": 0.3522, "step": 5393, "task_loss": 1.0670139789581299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30793559551239014, "epoch": 4.56, "learning_rate": 1.7431469629271828e-05, "loss": 0.4515, "step": 5394, "task_loss": 0.4084318280220032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34331244230270386, "epoch": 4.56, "learning_rate": 1.7425431711146e-05, "loss": 0.3463, "step": 5395, "task_loss": 0.5150569081306458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17209069430828094, "epoch": 4.56, "learning_rate": 1.7419393793020166e-05, "loss": 0.3752, "step": 5396, "task_loss": 0.351407527923584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36956608295440674, "epoch": 4.56, "learning_rate": 1.741335587489434e-05, "loss": 0.4957, "step": 5397, "task_loss": 1.0017306804656982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36584150791168213, "epoch": 4.56, "learning_rate": 1.7407317956768507e-05, "loss": 0.3382, "step": 5398, "task_loss": 1.3867346048355103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1878511905670166, "epoch": 4.56, "learning_rate": 1.7401280038642674e-05, "loss": 0.3048, "step": 5399, "task_loss": 0.1377219408750534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15198372304439545, "epoch": 4.56, "learning_rate": 1.7395242120516848e-05, "loss": 0.3762, "step": 5400, "task_loss": 0.27353692054748535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3788922131061554, "epoch": 4.57, "learning_rate": 1.7389204202391015e-05, "loss": 0.3766, "step": 5401, "task_loss": 0.28014230728149414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34905317425727844, "epoch": 4.57, "learning_rate": 1.7383166284265186e-05, "loss": 0.4068, "step": 5402, "task_loss": 0.48892942070961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5510030388832092, "epoch": 4.57, "learning_rate": 1.7377128366139356e-05, "loss": 0.4034, "step": 5403, "task_loss": 0.18187032639980316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4822460114955902, "epoch": 4.57, "learning_rate": 1.7371090448013523e-05, "loss": 0.5338, "step": 5404, "task_loss": 0.707746684551239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3051389455795288, "epoch": 4.57, "learning_rate": 1.7365052529887697e-05, "loss": 0.3044, "step": 5405, "task_loss": 0.06330026686191559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3385714888572693, "epoch": 4.57, "learning_rate": 1.7359014611761865e-05, "loss": 0.3657, "step": 5406, "task_loss": 1.3482164144515991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2842170298099518, "epoch": 4.57, "learning_rate": 1.7352976693636035e-05, "loss": 0.3707, "step": 5407, "task_loss": 0.891694962978363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5710374116897583, "epoch": 4.57, "learning_rate": 1.7346938775510206e-05, "loss": 0.3956, "step": 5408, "task_loss": 0.734816312789917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22824808955192566, "epoch": 4.57, "learning_rate": 1.7340900857384373e-05, "loss": 0.4239, "step": 5409, "task_loss": 0.21285134553909302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1376221626996994, "epoch": 4.57, "learning_rate": 1.7334862939258543e-05, "loss": 0.3538, "step": 5410, "task_loss": 0.4664834141731262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21381130814552307, "epoch": 4.57, "learning_rate": 1.7328825021132714e-05, "loss": 0.3752, "step": 5411, "task_loss": 0.29769837856292725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4739810526371002, "epoch": 4.57, "learning_rate": 1.7322787103006885e-05, "loss": 0.3942, "step": 5412, "task_loss": 1.149025321006775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24812214076519012, "epoch": 4.58, "learning_rate": 1.7316749184881055e-05, "loss": 0.3782, "step": 5413, "task_loss": 0.4133117198944092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.340928316116333, "epoch": 4.58, "learning_rate": 1.7310711266755222e-05, "loss": 0.3339, "step": 5414, "task_loss": 0.5068440437316895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3356567919254303, "epoch": 4.58, "learning_rate": 1.7304673348629393e-05, "loss": 0.2944, "step": 5415, "task_loss": 0.5175650715827942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27113890647888184, "epoch": 4.58, "learning_rate": 1.7298635430503563e-05, "loss": 0.369, "step": 5416, "task_loss": 0.3718273937702179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6226467490196228, "epoch": 4.58, "learning_rate": 1.7292597512377734e-05, "loss": 0.5909, "step": 5417, "task_loss": 0.8846092224121094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32897746562957764, "epoch": 4.58, "learning_rate": 1.72865595942519e-05, "loss": 0.4856, "step": 5418, "task_loss": 0.6417104005813599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4244253635406494, "epoch": 4.58, "learning_rate": 1.7280521676126072e-05, "loss": 0.451, "step": 5419, "task_loss": 0.8053848147392273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5191563963890076, "epoch": 4.58, "learning_rate": 1.7274483758000242e-05, "loss": 0.4529, "step": 5420, "task_loss": 0.4423121213912964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46603065729141235, "epoch": 4.58, "learning_rate": 1.7268445839874413e-05, "loss": 0.4147, "step": 5421, "task_loss": 0.7992224097251892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36699968576431274, "epoch": 4.58, "learning_rate": 1.7262407921748584e-05, "loss": 0.3862, "step": 5422, "task_loss": 0.41774675250053406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1456504911184311, "epoch": 4.58, "learning_rate": 1.725637000362275e-05, "loss": 0.3122, "step": 5423, "task_loss": 0.5766501426696777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3097543716430664, "epoch": 4.58, "learning_rate": 1.725033208549692e-05, "loss": 0.4149, "step": 5424, "task_loss": 0.11654257029294968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22183722257614136, "epoch": 4.59, "learning_rate": 1.7244294167371092e-05, "loss": 0.3131, "step": 5425, "task_loss": 0.35902518033981323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2206316739320755, "epoch": 4.59, "learning_rate": 1.723825624924526e-05, "loss": 0.2841, "step": 5426, "task_loss": 0.19743886590003967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26833391189575195, "epoch": 4.59, "learning_rate": 1.7232218331119433e-05, "loss": 0.3277, "step": 5427, "task_loss": 1.3691775798797607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3365195095539093, "epoch": 4.59, "learning_rate": 1.72261804129936e-05, "loss": 0.3338, "step": 5428, "task_loss": 0.11196917295455933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5527483820915222, "epoch": 4.59, "learning_rate": 1.722014249486777e-05, "loss": 0.4168, "step": 5429, "task_loss": 0.37216195464134216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.51743084192276, "epoch": 4.59, "learning_rate": 1.721410457674194e-05, "loss": 0.3595, "step": 5430, "task_loss": 0.5802571177482605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34776705503463745, "epoch": 4.59, "learning_rate": 1.720806665861611e-05, "loss": 0.3029, "step": 5431, "task_loss": 0.5347296595573425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23763814568519592, "epoch": 4.59, "learning_rate": 1.7202028740490282e-05, "loss": 0.28, "step": 5432, "task_loss": 0.20346134901046753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3065599799156189, "epoch": 4.59, "learning_rate": 1.719599082236445e-05, "loss": 0.3966, "step": 5433, "task_loss": 0.5789757966995239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2785662114620209, "epoch": 4.59, "learning_rate": 1.7189952904238617e-05, "loss": 0.415, "step": 5434, "task_loss": 0.7965628504753113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36551737785339355, "epoch": 4.59, "learning_rate": 1.718391498611279e-05, "loss": 0.3512, "step": 5435, "task_loss": 0.7154884338378906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33005982637405396, "epoch": 4.59, "learning_rate": 1.7177877067986958e-05, "loss": 0.3176, "step": 5436, "task_loss": 1.164353609085083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41143175959587097, "epoch": 4.6, "learning_rate": 1.717183914986113e-05, "loss": 0.2762, "step": 5437, "task_loss": 0.9940332174301147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5729286074638367, "epoch": 4.6, "learning_rate": 1.71658012317353e-05, "loss": 0.4032, "step": 5438, "task_loss": 0.7186647653579712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2184518575668335, "epoch": 4.6, "learning_rate": 1.7159763313609466e-05, "loss": 0.3274, "step": 5439, "task_loss": 0.5065006613731384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5037346482276917, "epoch": 4.6, "learning_rate": 1.715372539548364e-05, "loss": 0.5285, "step": 5440, "task_loss": 0.537757396697998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49823200702667236, "epoch": 4.6, "learning_rate": 1.7147687477357807e-05, "loss": 0.3827, "step": 5441, "task_loss": 0.8727835416793823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4870235025882721, "epoch": 4.6, "learning_rate": 1.7141649559231978e-05, "loss": 0.3605, "step": 5442, "task_loss": 0.6830419301986694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27274006605148315, "epoch": 4.6, "learning_rate": 1.713561164110615e-05, "loss": 0.357, "step": 5443, "task_loss": 0.12779302895069122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7263810634613037, "epoch": 4.6, "learning_rate": 1.7129573722980316e-05, "loss": 0.4687, "step": 5444, "task_loss": 0.5574979186058044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28415727615356445, "epoch": 4.6, "learning_rate": 1.7123535804854486e-05, "loss": 0.3517, "step": 5445, "task_loss": 0.23911257088184357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5205274224281311, "epoch": 4.6, "learning_rate": 1.7117497886728657e-05, "loss": 0.4412, "step": 5446, "task_loss": 0.7083708047866821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3142707347869873, "epoch": 4.6, "learning_rate": 1.7111459968602827e-05, "loss": 0.394, "step": 5447, "task_loss": 0.4441191852092743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31522002816200256, "epoch": 4.6, "learning_rate": 1.7105422050476995e-05, "loss": 0.3537, "step": 5448, "task_loss": 0.4821171760559082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47771045565605164, "epoch": 4.61, "learning_rate": 1.7099384132351165e-05, "loss": 0.4005, "step": 5449, "task_loss": 0.6810716390609741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39250755310058594, "epoch": 4.61, "learning_rate": 1.7093346214225336e-05, "loss": 0.3716, "step": 5450, "task_loss": 0.4585627615451813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32053297758102417, "epoch": 4.61, "learning_rate": 1.7087308296099506e-05, "loss": 0.3505, "step": 5451, "task_loss": 0.828304648399353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2941261827945709, "epoch": 4.61, "learning_rate": 1.7081270377973677e-05, "loss": 0.3381, "step": 5452, "task_loss": 0.873328447341919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47729188203811646, "epoch": 4.61, "learning_rate": 1.7075232459847844e-05, "loss": 0.4239, "step": 5453, "task_loss": 0.7758893966674805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4117157757282257, "epoch": 4.61, "learning_rate": 1.7069194541722015e-05, "loss": 0.4191, "step": 5454, "task_loss": 1.1193768978118896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3602403402328491, "epoch": 4.61, "learning_rate": 1.7063156623596185e-05, "loss": 0.5052, "step": 5455, "task_loss": 1.1129984855651855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24884316325187683, "epoch": 4.61, "learning_rate": 1.7057118705470352e-05, "loss": 0.3965, "step": 5456, "task_loss": 0.026654182001948357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3016606569290161, "epoch": 4.61, "learning_rate": 1.7051080787344526e-05, "loss": 0.3321, "step": 5457, "task_loss": 0.8403924703598022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31170719861984253, "epoch": 4.61, "learning_rate": 1.7045042869218694e-05, "loss": 0.2957, "step": 5458, "task_loss": 0.7098709940910339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5106058120727539, "epoch": 4.61, "learning_rate": 1.7039004951092864e-05, "loss": 0.4326, "step": 5459, "task_loss": 1.2950868606567383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17888377606868744, "epoch": 4.61, "learning_rate": 1.7032967032967035e-05, "loss": 0.4549, "step": 5460, "task_loss": 0.3724541664123535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3140276074409485, "epoch": 4.62, "learning_rate": 1.7026929114841202e-05, "loss": 0.4344, "step": 5461, "task_loss": 0.13338294625282288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3062554895877838, "epoch": 4.62, "learning_rate": 1.7020891196715376e-05, "loss": 0.4023, "step": 5462, "task_loss": 1.0261800289154053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5453802943229675, "epoch": 4.62, "learning_rate": 1.7014853278589543e-05, "loss": 0.4143, "step": 5463, "task_loss": 1.593195915222168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3254993259906769, "epoch": 4.62, "learning_rate": 1.700881536046371e-05, "loss": 0.3495, "step": 5464, "task_loss": 0.5125510692596436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17711149156093597, "epoch": 4.62, "learning_rate": 1.7002777442337884e-05, "loss": 0.4025, "step": 5465, "task_loss": 0.4269407391548157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3585367798805237, "epoch": 4.62, "learning_rate": 1.699673952421205e-05, "loss": 0.4398, "step": 5466, "task_loss": 0.7237930297851562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39334070682525635, "epoch": 4.62, "learning_rate": 1.6990701606086222e-05, "loss": 0.2577, "step": 5467, "task_loss": 0.6496084332466125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5265402793884277, "epoch": 4.62, "learning_rate": 1.6984663687960393e-05, "loss": 0.4643, "step": 5468, "task_loss": 0.2506779432296753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2218146026134491, "epoch": 4.62, "learning_rate": 1.697862576983456e-05, "loss": 0.3442, "step": 5469, "task_loss": 0.09011900424957275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3207870423793793, "epoch": 4.62, "learning_rate": 1.6972587851708734e-05, "loss": 0.3437, "step": 5470, "task_loss": 0.7752838134765625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4146147668361664, "epoch": 4.62, "learning_rate": 1.69665499335829e-05, "loss": 0.3739, "step": 5471, "task_loss": 0.301130473613739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31957316398620605, "epoch": 4.63, "learning_rate": 1.696051201545707e-05, "loss": 0.3687, "step": 5472, "task_loss": 0.78006911277771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5944710373878479, "epoch": 4.63, "learning_rate": 1.6954474097331242e-05, "loss": 0.4582, "step": 5473, "task_loss": 0.6780087351799011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4588538408279419, "epoch": 4.63, "learning_rate": 1.694843617920541e-05, "loss": 0.4269, "step": 5474, "task_loss": 0.3047696352005005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20740975439548492, "epoch": 4.63, "learning_rate": 1.694239826107958e-05, "loss": 0.3259, "step": 5475, "task_loss": 0.17102204263210297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19572341442108154, "epoch": 4.63, "learning_rate": 1.693636034295375e-05, "loss": 0.5317, "step": 5476, "task_loss": 1.1574126482009888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30245015025138855, "epoch": 4.63, "learning_rate": 1.693032242482792e-05, "loss": 0.3903, "step": 5477, "task_loss": 0.9965812563896179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42095232009887695, "epoch": 4.63, "learning_rate": 1.692428450670209e-05, "loss": 0.4331, "step": 5478, "task_loss": 0.6878898739814758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5937291979789734, "epoch": 4.63, "learning_rate": 1.691824658857626e-05, "loss": 0.4077, "step": 5479, "task_loss": 0.552997887134552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5670665502548218, "epoch": 4.63, "learning_rate": 1.691220867045043e-05, "loss": 0.42, "step": 5480, "task_loss": 0.7749813795089722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2674107551574707, "epoch": 4.63, "learning_rate": 1.69061707523246e-05, "loss": 0.3736, "step": 5481, "task_loss": 0.44413384795188904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3676900863647461, "epoch": 4.63, "learning_rate": 1.690013283419877e-05, "loss": 0.3348, "step": 5482, "task_loss": 0.3886657953262329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2284901738166809, "epoch": 4.63, "learning_rate": 1.6894094916072938e-05, "loss": 0.3066, "step": 5483, "task_loss": 1.295309066772461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2577158510684967, "epoch": 4.64, "learning_rate": 1.6888056997947108e-05, "loss": 0.3165, "step": 5484, "task_loss": 0.6422187089920044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3983737528324127, "epoch": 4.64, "learning_rate": 1.688201907982128e-05, "loss": 0.3709, "step": 5485, "task_loss": 0.592012882232666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48137906193733215, "epoch": 4.64, "learning_rate": 1.687598116169545e-05, "loss": 0.3585, "step": 5486, "task_loss": 0.544343888759613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44034165143966675, "epoch": 4.64, "learning_rate": 1.686994324356962e-05, "loss": 0.396, "step": 5487, "task_loss": 0.4379546344280243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24586822092533112, "epoch": 4.64, "learning_rate": 1.6863905325443787e-05, "loss": 0.3988, "step": 5488, "task_loss": 0.9268527030944824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37214624881744385, "epoch": 4.64, "learning_rate": 1.6857867407317958e-05, "loss": 0.3414, "step": 5489, "task_loss": 0.4172840416431427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5136826038360596, "epoch": 4.64, "learning_rate": 1.6851829489192128e-05, "loss": 0.3684, "step": 5490, "task_loss": 1.3969645500183105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26278749108314514, "epoch": 4.64, "learning_rate": 1.6845791571066295e-05, "loss": 0.3704, "step": 5491, "task_loss": 0.3249305784702301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27378904819488525, "epoch": 4.64, "learning_rate": 1.683975365294047e-05, "loss": 0.4268, "step": 5492, "task_loss": 0.36223387718200684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5738267302513123, "epoch": 4.64, "learning_rate": 1.6833715734814636e-05, "loss": 0.398, "step": 5493, "task_loss": 0.606329619884491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40485629439353943, "epoch": 4.64, "learning_rate": 1.6827677816688807e-05, "loss": 0.4001, "step": 5494, "task_loss": 0.5645108222961426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3212580680847168, "epoch": 4.64, "learning_rate": 1.6821639898562978e-05, "loss": 0.3512, "step": 5495, "task_loss": 0.6522457599639893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22287338972091675, "epoch": 4.65, "learning_rate": 1.6815601980437145e-05, "loss": 0.3335, "step": 5496, "task_loss": 1.101863980293274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24942351877689362, "epoch": 4.65, "learning_rate": 1.680956406231132e-05, "loss": 0.3684, "step": 5497, "task_loss": 1.0606424808502197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2933821380138397, "epoch": 4.65, "learning_rate": 1.6803526144185486e-05, "loss": 0.2516, "step": 5498, "task_loss": 1.0214073657989502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27707695960998535, "epoch": 4.65, "learning_rate": 1.6797488226059653e-05, "loss": 0.2706, "step": 5499, "task_loss": 0.3243105113506317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3124169111251831, "epoch": 4.65, "learning_rate": 1.6791450307933827e-05, "loss": 0.3308, "step": 5500, "task_loss": 0.556459367275238 }, { "epoch": 4.65, "eval_accuracy": 0.9158811881188119, "eval_loss": 0.24390991032123566, "eval_runtime": 314.124, "eval_samples_per_second": 80.382, "eval_steps_per_second": 0.63, "step": 5500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38561713695526123, "epoch": 4.65, "learning_rate": 1.6785412389807994e-05, "loss": 0.3806, "step": 5501, "task_loss": 0.4075198769569397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39028269052505493, "epoch": 4.65, "learning_rate": 1.6779374471682165e-05, "loss": 0.3798, "step": 5502, "task_loss": 0.5047093629837036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2040516585111618, "epoch": 4.65, "learning_rate": 1.6773336553556335e-05, "loss": 0.3614, "step": 5503, "task_loss": 1.2375210523605347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3575834035873413, "epoch": 4.65, "learning_rate": 1.6767298635430503e-05, "loss": 0.38, "step": 5504, "task_loss": 0.488452672958374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31673672795295715, "epoch": 4.65, "learning_rate": 1.6761260717304677e-05, "loss": 0.4555, "step": 5505, "task_loss": 0.32613152265548706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2783457040786743, "epoch": 4.65, "learning_rate": 1.6755222799178844e-05, "loss": 0.2998, "step": 5506, "task_loss": 0.4563921093940735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3486720323562622, "epoch": 4.65, "learning_rate": 1.6749184881053014e-05, "loss": 0.3805, "step": 5507, "task_loss": 0.6171669363975525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25495752692222595, "epoch": 4.66, "learning_rate": 1.6743146962927185e-05, "loss": 0.3544, "step": 5508, "task_loss": 1.2256313562393188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4457526206970215, "epoch": 4.66, "learning_rate": 1.6737109044801352e-05, "loss": 0.383, "step": 5509, "task_loss": 0.46839314699172974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21624276041984558, "epoch": 4.66, "learning_rate": 1.6731071126675523e-05, "loss": 0.3153, "step": 5510, "task_loss": 0.37507396936416626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4555862545967102, "epoch": 4.66, "learning_rate": 1.6725033208549693e-05, "loss": 0.3527, "step": 5511, "task_loss": 0.5320653319358826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32560378313064575, "epoch": 4.66, "learning_rate": 1.6718995290423864e-05, "loss": 0.4414, "step": 5512, "task_loss": 0.02033126726746559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4194319248199463, "epoch": 4.66, "learning_rate": 1.671295737229803e-05, "loss": 0.4011, "step": 5513, "task_loss": 0.3788720667362213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14869391918182373, "epoch": 4.66, "learning_rate": 1.67069194541722e-05, "loss": 0.2924, "step": 5514, "task_loss": 0.4238344132900238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20553144812583923, "epoch": 4.66, "learning_rate": 1.6700881536046372e-05, "loss": 0.4127, "step": 5515, "task_loss": 0.5418580174446106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17718185484409332, "epoch": 4.66, "learning_rate": 1.6694843617920543e-05, "loss": 0.3172, "step": 5516, "task_loss": 0.5479878187179565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3764761686325073, "epoch": 4.66, "learning_rate": 1.6688805699794713e-05, "loss": 0.4005, "step": 5517, "task_loss": 0.542385995388031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3494150936603546, "epoch": 4.66, "learning_rate": 1.668276778166888e-05, "loss": 0.2741, "step": 5518, "task_loss": 0.7225267887115479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.477494478225708, "epoch": 4.66, "learning_rate": 1.667672986354305e-05, "loss": 0.469, "step": 5519, "task_loss": 0.46674850583076477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2821961045265198, "epoch": 4.67, "learning_rate": 1.667069194541722e-05, "loss": 0.3477, "step": 5520, "task_loss": 0.7123687267303467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20197749137878418, "epoch": 4.67, "learning_rate": 1.666465402729139e-05, "loss": 0.2651, "step": 5521, "task_loss": 0.05679545924067497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2691640257835388, "epoch": 4.67, "learning_rate": 1.6658616109165563e-05, "loss": 0.4059, "step": 5522, "task_loss": 0.5268264412879944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3121335506439209, "epoch": 4.67, "learning_rate": 1.665257819103973e-05, "loss": 0.3137, "step": 5523, "task_loss": 0.5435363054275513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45041435956954956, "epoch": 4.67, "learning_rate": 1.66465402729139e-05, "loss": 0.4203, "step": 5524, "task_loss": 0.11159893125295639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.334434449672699, "epoch": 4.67, "learning_rate": 1.664050235478807e-05, "loss": 0.4036, "step": 5525, "task_loss": 0.16028916835784912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3218253552913666, "epoch": 4.67, "learning_rate": 1.6634464436662238e-05, "loss": 0.3775, "step": 5526, "task_loss": 0.19246014952659607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4027683734893799, "epoch": 4.67, "learning_rate": 1.6628426518536412e-05, "loss": 0.3161, "step": 5527, "task_loss": 1.3494601249694824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44744908809661865, "epoch": 4.67, "learning_rate": 1.662238860041058e-05, "loss": 0.3683, "step": 5528, "task_loss": 0.9114257097244263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.310538113117218, "epoch": 4.67, "learning_rate": 1.6616350682284747e-05, "loss": 0.4344, "step": 5529, "task_loss": 0.25601622462272644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25573426485061646, "epoch": 4.67, "learning_rate": 1.661031276415892e-05, "loss": 0.3883, "step": 5530, "task_loss": 0.638984739780426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49917536973953247, "epoch": 4.67, "learning_rate": 1.6604274846033088e-05, "loss": 0.357, "step": 5531, "task_loss": 0.9876900315284729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4920165538787842, "epoch": 4.68, "learning_rate": 1.6598236927907258e-05, "loss": 0.4407, "step": 5532, "task_loss": 0.6449463367462158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36652958393096924, "epoch": 4.68, "learning_rate": 1.659219900978143e-05, "loss": 0.2669, "step": 5533, "task_loss": 0.11038838326931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2970685064792633, "epoch": 4.68, "learning_rate": 1.6586161091655596e-05, "loss": 0.3622, "step": 5534, "task_loss": 0.39715996384620667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29115450382232666, "epoch": 4.68, "learning_rate": 1.658012317352977e-05, "loss": 0.3791, "step": 5535, "task_loss": 0.4258503019809723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20562687516212463, "epoch": 4.68, "learning_rate": 1.6574085255403937e-05, "loss": 0.3652, "step": 5536, "task_loss": 1.0886890888214111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5461781024932861, "epoch": 4.68, "learning_rate": 1.6568047337278108e-05, "loss": 0.3606, "step": 5537, "task_loss": 0.6905860900878906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35028162598609924, "epoch": 4.68, "learning_rate": 1.6562009419152278e-05, "loss": 0.4001, "step": 5538, "task_loss": 0.5479485392570496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2547874450683594, "epoch": 4.68, "learning_rate": 1.6555971501026445e-05, "loss": 0.4332, "step": 5539, "task_loss": 1.1151905059814453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37428468465805054, "epoch": 4.68, "learning_rate": 1.6549933582900616e-05, "loss": 0.4661, "step": 5540, "task_loss": 0.5161199569702148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3050529956817627, "epoch": 4.68, "learning_rate": 1.6543895664774787e-05, "loss": 0.2947, "step": 5541, "task_loss": 0.4735046327114105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16775918006896973, "epoch": 4.68, "learning_rate": 1.6537857746648957e-05, "loss": 0.4436, "step": 5542, "task_loss": 1.1156505346298218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36521023511886597, "epoch": 4.69, "learning_rate": 1.6531819828523128e-05, "loss": 0.4209, "step": 5543, "task_loss": 1.0567935705184937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2969929575920105, "epoch": 4.69, "learning_rate": 1.6525781910397295e-05, "loss": 0.3052, "step": 5544, "task_loss": 0.41372281312942505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22777098417282104, "epoch": 4.69, "learning_rate": 1.6519743992271465e-05, "loss": 0.2643, "step": 5545, "task_loss": 0.4025246500968933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25484979152679443, "epoch": 4.69, "learning_rate": 1.6513706074145636e-05, "loss": 0.3102, "step": 5546, "task_loss": 0.3582947552204132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32560816407203674, "epoch": 4.69, "learning_rate": 1.6507668156019807e-05, "loss": 0.2806, "step": 5547, "task_loss": 0.49334514141082764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1791953146457672, "epoch": 4.69, "learning_rate": 1.6501630237893974e-05, "loss": 0.4013, "step": 5548, "task_loss": 0.6656315922737122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23227302730083466, "epoch": 4.69, "learning_rate": 1.6495592319768144e-05, "loss": 0.2731, "step": 5549, "task_loss": 0.709033191204071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7226852178573608, "epoch": 4.69, "learning_rate": 1.6489554401642315e-05, "loss": 0.4432, "step": 5550, "task_loss": 0.4622979164123535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.293008029460907, "epoch": 4.69, "learning_rate": 1.6483516483516486e-05, "loss": 0.3015, "step": 5551, "task_loss": 0.7518520355224609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4477537274360657, "epoch": 4.69, "learning_rate": 1.6477478565390653e-05, "loss": 0.3919, "step": 5552, "task_loss": 0.772212028503418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.345295786857605, "epoch": 4.69, "learning_rate": 1.6471440647264823e-05, "loss": 0.4308, "step": 5553, "task_loss": 1.1015520095825195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4386223256587982, "epoch": 4.69, "learning_rate": 1.6465402729138994e-05, "loss": 0.3316, "step": 5554, "task_loss": 0.32797950506210327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3854101896286011, "epoch": 4.7, "learning_rate": 1.6459364811013164e-05, "loss": 0.3914, "step": 5555, "task_loss": 0.6807112097740173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39694273471832275, "epoch": 4.7, "learning_rate": 1.645332689288733e-05, "loss": 0.4262, "step": 5556, "task_loss": 0.49112194776535034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2842100262641907, "epoch": 4.7, "learning_rate": 1.6447288974761502e-05, "loss": 0.3411, "step": 5557, "task_loss": 0.3123435378074646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2979647219181061, "epoch": 4.7, "learning_rate": 1.6441251056635673e-05, "loss": 0.3648, "step": 5558, "task_loss": 0.4727555811405182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20820093154907227, "epoch": 4.7, "learning_rate": 1.6435213138509843e-05, "loss": 0.2809, "step": 5559, "task_loss": 0.3144432604312897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28307104110717773, "epoch": 4.7, "learning_rate": 1.6429175220384014e-05, "loss": 0.4298, "step": 5560, "task_loss": 0.7429260611534119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27154427766799927, "epoch": 4.7, "learning_rate": 1.642313730225818e-05, "loss": 0.3826, "step": 5561, "task_loss": 0.5165733695030212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3032211363315582, "epoch": 4.7, "learning_rate": 1.641709938413235e-05, "loss": 0.3939, "step": 5562, "task_loss": 0.5300989151000977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3834559917449951, "epoch": 4.7, "learning_rate": 1.6411061466006522e-05, "loss": 0.397, "step": 5563, "task_loss": 0.6551104784011841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45493507385253906, "epoch": 4.7, "learning_rate": 1.640502354788069e-05, "loss": 0.4174, "step": 5564, "task_loss": 0.11836820840835571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3081902861595154, "epoch": 4.7, "learning_rate": 1.6398985629754863e-05, "loss": 0.3863, "step": 5565, "task_loss": 0.6248270869255066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.336664080619812, "epoch": 4.7, "learning_rate": 1.639294771162903e-05, "loss": 0.3925, "step": 5566, "task_loss": 0.610039472579956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2832680940628052, "epoch": 4.71, "learning_rate": 1.63869097935032e-05, "loss": 0.2959, "step": 5567, "task_loss": 0.46018514037132263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3970176577568054, "epoch": 4.71, "learning_rate": 1.638087187537737e-05, "loss": 0.4198, "step": 5568, "task_loss": 0.2062007635831833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3441285490989685, "epoch": 4.71, "learning_rate": 1.637483395725154e-05, "loss": 0.4167, "step": 5569, "task_loss": 0.36459609866142273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27104848623275757, "epoch": 4.71, "learning_rate": 1.636879603912571e-05, "loss": 0.2763, "step": 5570, "task_loss": 0.550696611404419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29360172152519226, "epoch": 4.71, "learning_rate": 1.636275812099988e-05, "loss": 0.3838, "step": 5571, "task_loss": 0.5732907652854919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5402824282646179, "epoch": 4.71, "learning_rate": 1.6356720202874047e-05, "loss": 0.4506, "step": 5572, "task_loss": 0.9312769174575806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3396855294704437, "epoch": 4.71, "learning_rate": 1.635068228474822e-05, "loss": 0.4055, "step": 5573, "task_loss": 0.8282003402709961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3851912021636963, "epoch": 4.71, "learning_rate": 1.634464436662239e-05, "loss": 0.2916, "step": 5574, "task_loss": 0.3338393270969391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34695595502853394, "epoch": 4.71, "learning_rate": 1.633860644849656e-05, "loss": 0.4282, "step": 5575, "task_loss": 0.9439924955368042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2910234332084656, "epoch": 4.71, "learning_rate": 1.633256853037073e-05, "loss": 0.3068, "step": 5576, "task_loss": 0.19757166504859924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31525301933288574, "epoch": 4.71, "learning_rate": 1.6326530612244897e-05, "loss": 0.3578, "step": 5577, "task_loss": 0.6986369490623474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3806527256965637, "epoch": 4.71, "learning_rate": 1.6320492694119067e-05, "loss": 0.3515, "step": 5578, "task_loss": 0.6207656860351562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4525027573108673, "epoch": 4.72, "learning_rate": 1.6314454775993238e-05, "loss": 0.3942, "step": 5579, "task_loss": 1.0910580158233643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23645779490470886, "epoch": 4.72, "learning_rate": 1.630841685786741e-05, "loss": 0.3279, "step": 5580, "task_loss": 0.1877736896276474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1976706087589264, "epoch": 4.72, "learning_rate": 1.630237893974158e-05, "loss": 0.2948, "step": 5581, "task_loss": 0.19508562982082367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3213695287704468, "epoch": 4.72, "learning_rate": 1.6296341021615746e-05, "loss": 0.3678, "step": 5582, "task_loss": 0.5433447360992432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2761138379573822, "epoch": 4.72, "learning_rate": 1.6290303103489917e-05, "loss": 0.4234, "step": 5583, "task_loss": 0.4992988407611847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4564442038536072, "epoch": 4.72, "learning_rate": 1.6284265185364087e-05, "loss": 0.4361, "step": 5584, "task_loss": 1.313501238822937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.593304455280304, "epoch": 4.72, "learning_rate": 1.6278227267238258e-05, "loss": 0.3994, "step": 5585, "task_loss": 1.355379581451416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36121469736099243, "epoch": 4.72, "learning_rate": 1.6272189349112425e-05, "loss": 0.4333, "step": 5586, "task_loss": 0.5488883852958679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2626327574253082, "epoch": 4.72, "learning_rate": 1.6266151430986596e-05, "loss": 0.349, "step": 5587, "task_loss": 0.42929330468177795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3003905415534973, "epoch": 4.72, "learning_rate": 1.6260113512860766e-05, "loss": 0.3196, "step": 5588, "task_loss": 1.1411906480789185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4393826723098755, "epoch": 4.72, "learning_rate": 1.6254075594734937e-05, "loss": 0.327, "step": 5589, "task_loss": 0.30839571356773376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32307344675064087, "epoch": 4.72, "learning_rate": 1.6248037676609107e-05, "loss": 0.3067, "step": 5590, "task_loss": 0.08717489242553711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24922621250152588, "epoch": 4.73, "learning_rate": 1.6241999758483274e-05, "loss": 0.3013, "step": 5591, "task_loss": 1.214216709136963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17806212604045868, "epoch": 4.73, "learning_rate": 1.6235961840357445e-05, "loss": 0.3378, "step": 5592, "task_loss": 0.6361490488052368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20498956739902496, "epoch": 4.73, "learning_rate": 1.6229923922231616e-05, "loss": 0.2747, "step": 5593, "task_loss": 0.775695264339447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18027126789093018, "epoch": 4.73, "learning_rate": 1.6223886004105783e-05, "loss": 0.4211, "step": 5594, "task_loss": 0.6518685817718506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2859079837799072, "epoch": 4.73, "learning_rate": 1.6217848085979957e-05, "loss": 0.4468, "step": 5595, "task_loss": 0.10139435529708862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27772143483161926, "epoch": 4.73, "learning_rate": 1.6211810167854124e-05, "loss": 0.4478, "step": 5596, "task_loss": 0.3756099343299866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3311834931373596, "epoch": 4.73, "learning_rate": 1.6205772249728295e-05, "loss": 0.3566, "step": 5597, "task_loss": 0.9126965403556824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3443014919757843, "epoch": 4.73, "learning_rate": 1.6199734331602465e-05, "loss": 0.3144, "step": 5598, "task_loss": 0.6892973780632019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40705767273902893, "epoch": 4.73, "learning_rate": 1.6193696413476632e-05, "loss": 0.3628, "step": 5599, "task_loss": 0.6138812899589539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3497660160064697, "epoch": 4.73, "learning_rate": 1.6187658495350806e-05, "loss": 0.462, "step": 5600, "task_loss": 0.634336531162262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3888128399848938, "epoch": 4.73, "learning_rate": 1.6181620577224973e-05, "loss": 0.3374, "step": 5601, "task_loss": 0.4613480269908905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26404476165771484, "epoch": 4.73, "learning_rate": 1.617558265909914e-05, "loss": 0.4021, "step": 5602, "task_loss": 0.9909034371376038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17756396532058716, "epoch": 4.74, "learning_rate": 1.6169544740973315e-05, "loss": 0.3284, "step": 5603, "task_loss": 0.03624040260910988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30303046107292175, "epoch": 4.74, "learning_rate": 1.6163506822847482e-05, "loss": 0.2836, "step": 5604, "task_loss": 0.20533332228660583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21211370825767517, "epoch": 4.74, "learning_rate": 1.6157468904721652e-05, "loss": 0.3454, "step": 5605, "task_loss": 1.2314014434814453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23925280570983887, "epoch": 4.74, "learning_rate": 1.6151430986595823e-05, "loss": 0.4419, "step": 5606, "task_loss": 0.6184334754943848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3239653408527374, "epoch": 4.74, "learning_rate": 1.614539306846999e-05, "loss": 0.3295, "step": 5607, "task_loss": 0.21321222186088562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5777931213378906, "epoch": 4.74, "learning_rate": 1.6139355150344164e-05, "loss": 0.4314, "step": 5608, "task_loss": 1.5632389783859253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3376569449901581, "epoch": 4.74, "learning_rate": 1.613331723221833e-05, "loss": 0.3709, "step": 5609, "task_loss": 0.3184061348438263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5598204135894775, "epoch": 4.74, "learning_rate": 1.6127279314092502e-05, "loss": 0.4676, "step": 5610, "task_loss": 1.2127723693847656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5031595826148987, "epoch": 4.74, "learning_rate": 1.6121241395966672e-05, "loss": 0.3843, "step": 5611, "task_loss": 0.9508746862411499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4578322768211365, "epoch": 4.74, "learning_rate": 1.611520347784084e-05, "loss": 0.3929, "step": 5612, "task_loss": 0.8743744492530823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3156386911869049, "epoch": 4.74, "learning_rate": 1.610916555971501e-05, "loss": 0.2423, "step": 5613, "task_loss": 0.49644899368286133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28335776925086975, "epoch": 4.75, "learning_rate": 1.610312764158918e-05, "loss": 0.4289, "step": 5614, "task_loss": 0.7824926376342773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4156298041343689, "epoch": 4.75, "learning_rate": 1.609708972346335e-05, "loss": 0.4162, "step": 5615, "task_loss": 1.231238603591919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5514259338378906, "epoch": 4.75, "learning_rate": 1.6091051805337522e-05, "loss": 0.3851, "step": 5616, "task_loss": 1.0891026258468628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40305209159851074, "epoch": 4.75, "learning_rate": 1.608501388721169e-05, "loss": 0.4103, "step": 5617, "task_loss": 0.8868740797042847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35351046919822693, "epoch": 4.75, "learning_rate": 1.607897596908586e-05, "loss": 0.3885, "step": 5618, "task_loss": 0.3561086058616638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4441511631011963, "epoch": 4.75, "learning_rate": 1.607293805096003e-05, "loss": 0.493, "step": 5619, "task_loss": 1.3952422142028809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38906770944595337, "epoch": 4.75, "learning_rate": 1.60669001328342e-05, "loss": 0.3601, "step": 5620, "task_loss": 0.39539551734924316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4025821387767792, "epoch": 4.75, "learning_rate": 1.6060862214708368e-05, "loss": 0.3717, "step": 5621, "task_loss": 0.5599721074104309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5764844417572021, "epoch": 4.75, "learning_rate": 1.605482429658254e-05, "loss": 0.5302, "step": 5622, "task_loss": 0.4415630102157593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4286494851112366, "epoch": 4.75, "learning_rate": 1.604878637845671e-05, "loss": 0.3607, "step": 5623, "task_loss": 0.3167123794555664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20169469714164734, "epoch": 4.75, "learning_rate": 1.604274846033088e-05, "loss": 0.3217, "step": 5624, "task_loss": 0.6035557389259338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39334824681282043, "epoch": 4.75, "learning_rate": 1.603671054220505e-05, "loss": 0.4199, "step": 5625, "task_loss": 0.8347508311271667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39664509892463684, "epoch": 4.76, "learning_rate": 1.6030672624079217e-05, "loss": 0.3794, "step": 5626, "task_loss": 0.3792988955974579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22550085186958313, "epoch": 4.76, "learning_rate": 1.6024634705953388e-05, "loss": 0.4162, "step": 5627, "task_loss": 0.3858092725276947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35184887051582336, "epoch": 4.76, "learning_rate": 1.601859678782756e-05, "loss": 0.2462, "step": 5628, "task_loss": 0.3817923367023468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3325628936290741, "epoch": 4.76, "learning_rate": 1.6012558869701726e-05, "loss": 0.336, "step": 5629, "task_loss": 0.5259426832199097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4415942430496216, "epoch": 4.76, "learning_rate": 1.60065209515759e-05, "loss": 0.4215, "step": 5630, "task_loss": 0.5331969857215881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.611562192440033, "epoch": 4.76, "learning_rate": 1.6000483033450067e-05, "loss": 0.4642, "step": 5631, "task_loss": 0.5009388327598572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5097861886024475, "epoch": 4.76, "learning_rate": 1.5994445115324237e-05, "loss": 0.4017, "step": 5632, "task_loss": 0.6133516430854797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13939866423606873, "epoch": 4.76, "learning_rate": 1.5988407197198408e-05, "loss": 0.2304, "step": 5633, "task_loss": 0.25466838479042053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48802921175956726, "epoch": 4.76, "learning_rate": 1.5982369279072575e-05, "loss": 0.4376, "step": 5634, "task_loss": 0.9894362688064575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1880946159362793, "epoch": 4.76, "learning_rate": 1.5976331360946746e-05, "loss": 0.2692, "step": 5635, "task_loss": 0.42923054099082947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30355075001716614, "epoch": 4.76, "learning_rate": 1.5970293442820916e-05, "loss": 0.3054, "step": 5636, "task_loss": 0.37451574206352234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6911126375198364, "epoch": 4.76, "learning_rate": 1.5964255524695084e-05, "loss": 0.3854, "step": 5637, "task_loss": 0.7660987377166748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21854451298713684, "epoch": 4.77, "learning_rate": 1.5958217606569257e-05, "loss": 0.3582, "step": 5638, "task_loss": 0.43335554003715515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2948555648326874, "epoch": 4.77, "learning_rate": 1.5952179688443425e-05, "loss": 0.3096, "step": 5639, "task_loss": 0.8246344327926636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48210564255714417, "epoch": 4.77, "learning_rate": 1.5946141770317595e-05, "loss": 0.3713, "step": 5640, "task_loss": 1.8561969995498657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44248729944229126, "epoch": 4.77, "learning_rate": 1.5940103852191766e-05, "loss": 0.4877, "step": 5641, "task_loss": 0.6365928649902344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.10764037072658539, "epoch": 4.77, "learning_rate": 1.5934065934065933e-05, "loss": 0.3064, "step": 5642, "task_loss": 0.2954387366771698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5038437843322754, "epoch": 4.77, "learning_rate": 1.5928028015940104e-05, "loss": 0.4523, "step": 5643, "task_loss": 0.7628663182258606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16615208983421326, "epoch": 4.77, "learning_rate": 1.5921990097814274e-05, "loss": 0.3462, "step": 5644, "task_loss": 0.78499835729599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3849545419216156, "epoch": 4.77, "learning_rate": 1.5915952179688445e-05, "loss": 0.348, "step": 5645, "task_loss": 1.239438772201538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16065916419029236, "epoch": 4.77, "learning_rate": 1.5909914261562615e-05, "loss": 0.3791, "step": 5646, "task_loss": 0.2946849465370178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3043554127216339, "epoch": 4.77, "learning_rate": 1.5903876343436782e-05, "loss": 0.486, "step": 5647, "task_loss": 0.7241219878196716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4821363091468811, "epoch": 4.77, "learning_rate": 1.5897838425310953e-05, "loss": 0.3823, "step": 5648, "task_loss": 0.5189279913902283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21683624386787415, "epoch": 4.77, "learning_rate": 1.5891800507185124e-05, "loss": 0.4534, "step": 5649, "task_loss": 0.23658373951911926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24522922933101654, "epoch": 4.78, "learning_rate": 1.5885762589059294e-05, "loss": 0.3058, "step": 5650, "task_loss": 0.15289083123207092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20459449291229248, "epoch": 4.78, "learning_rate": 1.587972467093346e-05, "loss": 0.4025, "step": 5651, "task_loss": 0.188956156373024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2857488989830017, "epoch": 4.78, "learning_rate": 1.5873686752807632e-05, "loss": 0.3305, "step": 5652, "task_loss": 1.0842268466949463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28939008712768555, "epoch": 4.78, "learning_rate": 1.5867648834681802e-05, "loss": 0.3769, "step": 5653, "task_loss": 0.4621902108192444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2018595188856125, "epoch": 4.78, "learning_rate": 1.5861610916555973e-05, "loss": 0.2702, "step": 5654, "task_loss": 0.21492311358451843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31273573637008667, "epoch": 4.78, "learning_rate": 1.5855572998430144e-05, "loss": 0.4132, "step": 5655, "task_loss": 0.9140501618385315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3335765600204468, "epoch": 4.78, "learning_rate": 1.584953508030431e-05, "loss": 0.3913, "step": 5656, "task_loss": 1.1372615098953247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2642756402492523, "epoch": 4.78, "learning_rate": 1.584349716217848e-05, "loss": 0.4393, "step": 5657, "task_loss": 0.2776612937450409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35220596194267273, "epoch": 4.78, "learning_rate": 1.5837459244052652e-05, "loss": 0.4463, "step": 5658, "task_loss": 0.8417865633964539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5648762583732605, "epoch": 4.78, "learning_rate": 1.583142132592682e-05, "loss": 0.4121, "step": 5659, "task_loss": 0.0789581686258316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32707053422927856, "epoch": 4.78, "learning_rate": 1.5825383407800993e-05, "loss": 0.4013, "step": 5660, "task_loss": 0.4861403703689575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21361804008483887, "epoch": 4.78, "learning_rate": 1.581934548967516e-05, "loss": 0.3889, "step": 5661, "task_loss": 0.445682168006897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32838767766952515, "epoch": 4.79, "learning_rate": 1.581330757154933e-05, "loss": 0.3209, "step": 5662, "task_loss": 0.11349904537200928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4814714789390564, "epoch": 4.79, "learning_rate": 1.58072696534235e-05, "loss": 0.3442, "step": 5663, "task_loss": 0.37581145763397217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43586987257003784, "epoch": 4.79, "learning_rate": 1.580123173529767e-05, "loss": 0.3772, "step": 5664, "task_loss": 0.8056401610374451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19433437287807465, "epoch": 4.79, "learning_rate": 1.5795193817171843e-05, "loss": 0.3009, "step": 5665, "task_loss": 0.6627820134162903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5356097221374512, "epoch": 4.79, "learning_rate": 1.578915589904601e-05, "loss": 0.5482, "step": 5666, "task_loss": 0.17102503776550293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16108247637748718, "epoch": 4.79, "learning_rate": 1.5783117980920177e-05, "loss": 0.3068, "step": 5667, "task_loss": 0.41143590211868286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2852683961391449, "epoch": 4.79, "learning_rate": 1.577708006279435e-05, "loss": 0.3368, "step": 5668, "task_loss": 0.11664585024118423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3399009108543396, "epoch": 4.79, "learning_rate": 1.5771042144668518e-05, "loss": 0.3324, "step": 5669, "task_loss": 0.23371511697769165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20107807219028473, "epoch": 4.79, "learning_rate": 1.576500422654269e-05, "loss": 0.4063, "step": 5670, "task_loss": 0.8355715870857239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5867009162902832, "epoch": 4.79, "learning_rate": 1.575896630841686e-05, "loss": 0.6406, "step": 5671, "task_loss": 1.0047637224197388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2885722517967224, "epoch": 4.79, "learning_rate": 1.5752928390291026e-05, "loss": 0.394, "step": 5672, "task_loss": 0.3503754734992981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2582879066467285, "epoch": 4.79, "learning_rate": 1.57468904721652e-05, "loss": 0.3714, "step": 5673, "task_loss": 0.9204216599464417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2840203642845154, "epoch": 4.8, "learning_rate": 1.5740852554039368e-05, "loss": 0.498, "step": 5674, "task_loss": 0.406760573387146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4152895510196686, "epoch": 4.8, "learning_rate": 1.5734814635913538e-05, "loss": 0.399, "step": 5675, "task_loss": 0.5007067918777466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2709985375404358, "epoch": 4.8, "learning_rate": 1.572877671778771e-05, "loss": 0.2839, "step": 5676, "task_loss": 0.3762127459049225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4842902421951294, "epoch": 4.8, "learning_rate": 1.5722738799661876e-05, "loss": 0.3746, "step": 5677, "task_loss": 0.6139999628067017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5326794981956482, "epoch": 4.8, "learning_rate": 1.5716700881536046e-05, "loss": 0.4507, "step": 5678, "task_loss": 0.7465367317199707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2599707543849945, "epoch": 4.8, "learning_rate": 1.5710662963410217e-05, "loss": 0.3522, "step": 5679, "task_loss": 1.6758629083633423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48383790254592896, "epoch": 4.8, "learning_rate": 1.5704625045284388e-05, "loss": 0.3962, "step": 5680, "task_loss": 1.0789330005645752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3971747159957886, "epoch": 4.8, "learning_rate": 1.5698587127158558e-05, "loss": 0.3742, "step": 5681, "task_loss": 0.603631854057312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5577627420425415, "epoch": 4.8, "learning_rate": 1.5692549209032725e-05, "loss": 0.4662, "step": 5682, "task_loss": 0.45365190505981445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13422133028507233, "epoch": 4.8, "learning_rate": 1.5686511290906896e-05, "loss": 0.2796, "step": 5683, "task_loss": 0.03844856843352318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3776628077030182, "epoch": 4.8, "learning_rate": 1.5680473372781066e-05, "loss": 0.2942, "step": 5684, "task_loss": 0.35623693466186523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32316017150878906, "epoch": 4.81, "learning_rate": 1.5674435454655237e-05, "loss": 0.3764, "step": 5685, "task_loss": 0.6154932379722595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3420259356498718, "epoch": 4.81, "learning_rate": 1.5668397536529404e-05, "loss": 0.3569, "step": 5686, "task_loss": 0.7252346873283386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15052436292171478, "epoch": 4.81, "learning_rate": 1.5662359618403575e-05, "loss": 0.2932, "step": 5687, "task_loss": 0.09908293932676315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4033365845680237, "epoch": 4.81, "learning_rate": 1.5656321700277745e-05, "loss": 0.3321, "step": 5688, "task_loss": 0.4937518537044525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40229010581970215, "epoch": 4.81, "learning_rate": 1.5650283782151916e-05, "loss": 0.3016, "step": 5689, "task_loss": 0.7098235487937927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28638288378715515, "epoch": 4.81, "learning_rate": 1.5644245864026087e-05, "loss": 0.2469, "step": 5690, "task_loss": 0.48285403847694397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.255278617143631, "epoch": 4.81, "learning_rate": 1.5638207945900254e-05, "loss": 0.314, "step": 5691, "task_loss": 1.3174291849136353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30586403608322144, "epoch": 4.81, "learning_rate": 1.5632170027774424e-05, "loss": 0.3933, "step": 5692, "task_loss": 1.3612581491470337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7281320095062256, "epoch": 4.81, "learning_rate": 1.5626132109648595e-05, "loss": 0.4598, "step": 5693, "task_loss": 1.4049265384674072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24793457984924316, "epoch": 4.81, "learning_rate": 1.5620094191522762e-05, "loss": 0.3831, "step": 5694, "task_loss": 0.5205627083778381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4649759531021118, "epoch": 4.81, "learning_rate": 1.5614056273396936e-05, "loss": 0.4288, "step": 5695, "task_loss": 0.8485907316207886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3815685510635376, "epoch": 4.81, "learning_rate": 1.5608018355271103e-05, "loss": 0.3695, "step": 5696, "task_loss": 0.5758039355278015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3919735252857208, "epoch": 4.82, "learning_rate": 1.5601980437145274e-05, "loss": 0.394, "step": 5697, "task_loss": 0.5478452444076538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23320573568344116, "epoch": 4.82, "learning_rate": 1.5595942519019444e-05, "loss": 0.3748, "step": 5698, "task_loss": 0.9182260036468506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18550661206245422, "epoch": 4.82, "learning_rate": 1.558990460089361e-05, "loss": 0.4102, "step": 5699, "task_loss": 0.9942810535430908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35011768341064453, "epoch": 4.82, "learning_rate": 1.5583866682767782e-05, "loss": 0.3098, "step": 5700, "task_loss": 0.488586962223053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3427453339099884, "epoch": 4.82, "learning_rate": 1.5577828764641953e-05, "loss": 0.329, "step": 5701, "task_loss": 0.3768784701824188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39258667826652527, "epoch": 4.82, "learning_rate": 1.557179084651612e-05, "loss": 0.3955, "step": 5702, "task_loss": 0.2113812267780304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.340221107006073, "epoch": 4.82, "learning_rate": 1.5565752928390294e-05, "loss": 0.3572, "step": 5703, "task_loss": 0.7969293594360352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8255331516265869, "epoch": 4.82, "learning_rate": 1.555971501026446e-05, "loss": 0.44, "step": 5704, "task_loss": 0.7273180484771729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35304850339889526, "epoch": 4.82, "learning_rate": 1.555367709213863e-05, "loss": 0.3499, "step": 5705, "task_loss": 0.1878131777048111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4386962652206421, "epoch": 4.82, "learning_rate": 1.5547639174012802e-05, "loss": 0.3749, "step": 5706, "task_loss": 0.7511147260665894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1978558897972107, "epoch": 4.82, "learning_rate": 1.554160125588697e-05, "loss": 0.4954, "step": 5707, "task_loss": 0.7922500967979431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3792358636856079, "epoch": 4.82, "learning_rate": 1.553556333776114e-05, "loss": 0.4154, "step": 5708, "task_loss": 0.7723556160926819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4178386628627777, "epoch": 4.83, "learning_rate": 1.552952541963531e-05, "loss": 0.406, "step": 5709, "task_loss": 0.3834013342857361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25520774722099304, "epoch": 4.83, "learning_rate": 1.552348750150948e-05, "loss": 0.3064, "step": 5710, "task_loss": 0.6633709669113159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2296271026134491, "epoch": 4.83, "learning_rate": 1.551744958338365e-05, "loss": 0.3921, "step": 5711, "task_loss": 0.5670307874679565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46010667085647583, "epoch": 4.83, "learning_rate": 1.551141166525782e-05, "loss": 0.3583, "step": 5712, "task_loss": 0.04134582728147507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3645346760749817, "epoch": 4.83, "learning_rate": 1.550537374713199e-05, "loss": 0.4182, "step": 5713, "task_loss": 0.6594998240470886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33919042348861694, "epoch": 4.83, "learning_rate": 1.549933582900616e-05, "loss": 0.412, "step": 5714, "task_loss": 0.2555519938468933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5652832984924316, "epoch": 4.83, "learning_rate": 1.549329791088033e-05, "loss": 0.3542, "step": 5715, "task_loss": 0.5196648240089417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2517048418521881, "epoch": 4.83, "learning_rate": 1.5487259992754498e-05, "loss": 0.433, "step": 5716, "task_loss": 0.32918840646743774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22670137882232666, "epoch": 4.83, "learning_rate": 1.5481222074628668e-05, "loss": 0.5495, "step": 5717, "task_loss": 0.5073003172874451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18155357241630554, "epoch": 4.83, "learning_rate": 1.547518415650284e-05, "loss": 0.3813, "step": 5718, "task_loss": 0.36423635482788086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39845719933509827, "epoch": 4.83, "learning_rate": 1.546914623837701e-05, "loss": 0.3681, "step": 5719, "task_loss": 0.4257934093475342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40851151943206787, "epoch": 4.83, "learning_rate": 1.546310832025118e-05, "loss": 0.4107, "step": 5720, "task_loss": 0.5878356099128723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47601327300071716, "epoch": 4.84, "learning_rate": 1.5457070402125347e-05, "loss": 0.4177, "step": 5721, "task_loss": 1.3163427114486694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3033120334148407, "epoch": 4.84, "learning_rate": 1.5451032483999518e-05, "loss": 0.3408, "step": 5722, "task_loss": 0.1596004217863083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4110749363899231, "epoch": 4.84, "learning_rate": 1.5444994565873688e-05, "loss": 0.2941, "step": 5723, "task_loss": 0.17972946166992188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36527764797210693, "epoch": 4.84, "learning_rate": 1.5438956647747855e-05, "loss": 0.3844, "step": 5724, "task_loss": 0.7732518911361694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33693450689315796, "epoch": 4.84, "learning_rate": 1.543291872962203e-05, "loss": 0.334, "step": 5725, "task_loss": 0.5477957725524902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3566434979438782, "epoch": 4.84, "learning_rate": 1.5426880811496197e-05, "loss": 0.4223, "step": 5726, "task_loss": 0.5278295278549194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29413801431655884, "epoch": 4.84, "learning_rate": 1.5420842893370367e-05, "loss": 0.4563, "step": 5727, "task_loss": 1.055405616760254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.397945761680603, "epoch": 4.84, "learning_rate": 1.5414804975244538e-05, "loss": 0.3807, "step": 5728, "task_loss": 0.2902117371559143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39485180377960205, "epoch": 4.84, "learning_rate": 1.5408767057118705e-05, "loss": 0.3378, "step": 5729, "task_loss": 0.4362782835960388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26491639018058777, "epoch": 4.84, "learning_rate": 1.5402729138992875e-05, "loss": 0.3572, "step": 5730, "task_loss": 0.27351444959640503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2783477306365967, "epoch": 4.84, "learning_rate": 1.5396691220867046e-05, "loss": 0.386, "step": 5731, "task_loss": 0.6363261342048645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28811898827552795, "epoch": 4.84, "learning_rate": 1.5390653302741213e-05, "loss": 0.4571, "step": 5732, "task_loss": 0.6321510076522827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43284282088279724, "epoch": 4.85, "learning_rate": 1.5384615384615387e-05, "loss": 0.4629, "step": 5733, "task_loss": 0.8742640018463135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3150917589664459, "epoch": 4.85, "learning_rate": 1.5378577466489554e-05, "loss": 0.3037, "step": 5734, "task_loss": 0.9062818288803101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44884830713272095, "epoch": 4.85, "learning_rate": 1.5372539548363725e-05, "loss": 0.4094, "step": 5735, "task_loss": 1.100426197052002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1640602946281433, "epoch": 4.85, "learning_rate": 1.5366501630237896e-05, "loss": 0.3849, "step": 5736, "task_loss": 0.19177134335041046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2261849045753479, "epoch": 4.85, "learning_rate": 1.5360463712112063e-05, "loss": 0.3073, "step": 5737, "task_loss": 0.1694553643465042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5367703437805176, "epoch": 4.85, "learning_rate": 1.5354425793986237e-05, "loss": 0.5163, "step": 5738, "task_loss": 0.9127690196037292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3820631802082062, "epoch": 4.85, "learning_rate": 1.5348387875860404e-05, "loss": 0.3852, "step": 5739, "task_loss": 0.441042423248291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3345189690589905, "epoch": 4.85, "learning_rate": 1.534234995773457e-05, "loss": 0.3377, "step": 5740, "task_loss": 0.4567604660987854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2947204113006592, "epoch": 4.85, "learning_rate": 1.5336312039608745e-05, "loss": 0.3245, "step": 5741, "task_loss": 0.9514395594596863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23655734956264496, "epoch": 4.85, "learning_rate": 1.5330274121482912e-05, "loss": 0.2857, "step": 5742, "task_loss": 0.5951398015022278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4218357801437378, "epoch": 4.85, "learning_rate": 1.5324236203357083e-05, "loss": 0.4208, "step": 5743, "task_loss": 0.11155934631824493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.555060863494873, "epoch": 4.85, "learning_rate": 1.5318198285231253e-05, "loss": 0.375, "step": 5744, "task_loss": 0.8551012873649597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2642982304096222, "epoch": 4.86, "learning_rate": 1.531216036710542e-05, "loss": 0.3384, "step": 5745, "task_loss": 0.39397504925727844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4091271162033081, "epoch": 4.86, "learning_rate": 1.5306122448979594e-05, "loss": 0.4051, "step": 5746, "task_loss": 1.034899353981018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18189255893230438, "epoch": 4.86, "learning_rate": 1.530008453085376e-05, "loss": 0.4121, "step": 5747, "task_loss": 0.6545719504356384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3321046531200409, "epoch": 4.86, "learning_rate": 1.5294046612727932e-05, "loss": 0.3214, "step": 5748, "task_loss": 1.0141355991363525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43186914920806885, "epoch": 4.86, "learning_rate": 1.5288008694602103e-05, "loss": 0.4206, "step": 5749, "task_loss": 1.232335090637207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25244373083114624, "epoch": 4.86, "learning_rate": 1.528197077647627e-05, "loss": 0.3796, "step": 5750, "task_loss": 0.7504710555076599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39745718240737915, "epoch": 4.86, "learning_rate": 1.527593285835044e-05, "loss": 0.4577, "step": 5751, "task_loss": 0.7572882175445557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21577507257461548, "epoch": 4.86, "learning_rate": 1.526989494022461e-05, "loss": 0.4372, "step": 5752, "task_loss": 1.6254284381866455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2885988652706146, "epoch": 4.86, "learning_rate": 1.526385702209878e-05, "loss": 0.3653, "step": 5753, "task_loss": 0.10668589174747467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2194802314043045, "epoch": 4.86, "learning_rate": 1.525781910397295e-05, "loss": 0.2839, "step": 5754, "task_loss": 0.6449970602989197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2936233878135681, "epoch": 4.86, "learning_rate": 1.525178118584712e-05, "loss": 0.38, "step": 5755, "task_loss": 1.6823714971542358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20574557781219482, "epoch": 4.87, "learning_rate": 1.524574326772129e-05, "loss": 0.3059, "step": 5756, "task_loss": 0.8813347220420837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36154329776763916, "epoch": 4.87, "learning_rate": 1.5239705349595459e-05, "loss": 0.2429, "step": 5757, "task_loss": 0.2567512094974518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2572384476661682, "epoch": 4.87, "learning_rate": 1.5233667431469631e-05, "loss": 0.3564, "step": 5758, "task_loss": 0.7968680262565613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48806503415107727, "epoch": 4.87, "learning_rate": 1.52276295133438e-05, "loss": 0.3874, "step": 5759, "task_loss": 0.9505220055580139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3021008372306824, "epoch": 4.87, "learning_rate": 1.5221591595217969e-05, "loss": 0.4069, "step": 5760, "task_loss": 0.14573611319065094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3162780702114105, "epoch": 4.87, "learning_rate": 1.521555367709214e-05, "loss": 0.3432, "step": 5761, "task_loss": 0.24223163723945618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19340604543685913, "epoch": 4.87, "learning_rate": 1.5209515758966308e-05, "loss": 0.263, "step": 5762, "task_loss": 0.11182047426700592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4432269334793091, "epoch": 4.87, "learning_rate": 1.520347784084048e-05, "loss": 0.303, "step": 5763, "task_loss": 0.3081468641757965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2580684721469879, "epoch": 4.87, "learning_rate": 1.5197439922714648e-05, "loss": 0.3469, "step": 5764, "task_loss": 0.5592470765113831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39691752195358276, "epoch": 4.87, "learning_rate": 1.5191402004588817e-05, "loss": 0.358, "step": 5765, "task_loss": 0.26140308380126953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48123878240585327, "epoch": 4.87, "learning_rate": 1.5185364086462989e-05, "loss": 0.4073, "step": 5766, "task_loss": 1.2389073371887207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.445859432220459, "epoch": 4.87, "learning_rate": 1.5179326168337158e-05, "loss": 0.3918, "step": 5767, "task_loss": 0.9793431758880615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4610786437988281, "epoch": 4.88, "learning_rate": 1.5173288250211328e-05, "loss": 0.4041, "step": 5768, "task_loss": 1.1688976287841797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30062201619148254, "epoch": 4.88, "learning_rate": 1.5167250332085497e-05, "loss": 0.3453, "step": 5769, "task_loss": 0.32422637939453125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21689723432064056, "epoch": 4.88, "learning_rate": 1.5161212413959666e-05, "loss": 0.3065, "step": 5770, "task_loss": 0.0686405822634697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21182042360305786, "epoch": 4.88, "learning_rate": 1.5155174495833838e-05, "loss": 0.3857, "step": 5771, "task_loss": 0.7384429574012756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3995056748390198, "epoch": 4.88, "learning_rate": 1.5149136577708006e-05, "loss": 0.3463, "step": 5772, "task_loss": 1.088712453842163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5352674722671509, "epoch": 4.88, "learning_rate": 1.5143098659582178e-05, "loss": 0.4301, "step": 5773, "task_loss": 0.7464427351951599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2854829728603363, "epoch": 4.88, "learning_rate": 1.5137060741456347e-05, "loss": 0.314, "step": 5774, "task_loss": 0.2767089903354645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.178146094083786, "epoch": 4.88, "learning_rate": 1.5131022823330516e-05, "loss": 0.2523, "step": 5775, "task_loss": 0.5600648522377014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3082842528820038, "epoch": 4.88, "learning_rate": 1.5124984905204686e-05, "loss": 0.3172, "step": 5776, "task_loss": 0.9536290764808655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36290687322616577, "epoch": 4.88, "learning_rate": 1.5118946987078855e-05, "loss": 0.3711, "step": 5777, "task_loss": 0.4570763111114502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4551832377910614, "epoch": 4.88, "learning_rate": 1.5112909068953027e-05, "loss": 0.3836, "step": 5778, "task_loss": 1.2717946767807007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28539013862609863, "epoch": 4.88, "learning_rate": 1.5106871150827196e-05, "loss": 0.3181, "step": 5779, "task_loss": 0.7153587937355042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3291042149066925, "epoch": 4.89, "learning_rate": 1.5100833232701363e-05, "loss": 0.2887, "step": 5780, "task_loss": 0.9556723237037659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37072819471359253, "epoch": 4.89, "learning_rate": 1.5094795314575536e-05, "loss": 0.3301, "step": 5781, "task_loss": 1.0332320928573608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25087928771972656, "epoch": 4.89, "learning_rate": 1.5088757396449705e-05, "loss": 0.3582, "step": 5782, "task_loss": 0.34444648027420044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42371514439582825, "epoch": 4.89, "learning_rate": 1.5082719478323875e-05, "loss": 0.4335, "step": 5783, "task_loss": 0.5069558620452881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35727912187576294, "epoch": 4.89, "learning_rate": 1.5076681560198044e-05, "loss": 0.3089, "step": 5784, "task_loss": 0.9092493057250977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3821491599082947, "epoch": 4.89, "learning_rate": 1.5070643642072213e-05, "loss": 0.3129, "step": 5785, "task_loss": 1.0388296842575073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34463685750961304, "epoch": 4.89, "learning_rate": 1.5064605723946385e-05, "loss": 0.3649, "step": 5786, "task_loss": 0.46713319420814514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7028311491012573, "epoch": 4.89, "learning_rate": 1.5058567805820554e-05, "loss": 0.4577, "step": 5787, "task_loss": 0.37677693367004395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3167526125907898, "epoch": 4.89, "learning_rate": 1.5052529887694725e-05, "loss": 0.4151, "step": 5788, "task_loss": 0.6489503383636475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3105502426624298, "epoch": 4.89, "learning_rate": 1.5046491969568893e-05, "loss": 0.3461, "step": 5789, "task_loss": 0.48992058634757996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27412542700767517, "epoch": 4.89, "learning_rate": 1.5040454051443062e-05, "loss": 0.3705, "step": 5790, "task_loss": 0.7261337041854858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24793162941932678, "epoch": 4.89, "learning_rate": 1.5034416133317233e-05, "loss": 0.2587, "step": 5791, "task_loss": 0.2559437155723572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2129763960838318, "epoch": 4.9, "learning_rate": 1.5028378215191402e-05, "loss": 0.2446, "step": 5792, "task_loss": 0.1779426634311676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24134224653244019, "epoch": 4.9, "learning_rate": 1.5022340297065574e-05, "loss": 0.4287, "step": 5793, "task_loss": 1.0253782272338867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4289560914039612, "epoch": 4.9, "learning_rate": 1.5016302378939743e-05, "loss": 0.3363, "step": 5794, "task_loss": 0.92864990234375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5308049917221069, "epoch": 4.9, "learning_rate": 1.501026446081391e-05, "loss": 0.3851, "step": 5795, "task_loss": 0.23604631423950195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3007281422615051, "epoch": 4.9, "learning_rate": 1.5004226542688082e-05, "loss": 0.3339, "step": 5796, "task_loss": 0.3478909432888031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6247969269752502, "epoch": 4.9, "learning_rate": 1.4998188624562251e-05, "loss": 0.4074, "step": 5797, "task_loss": 0.7697858214378357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31335511803627014, "epoch": 4.9, "learning_rate": 1.4992150706436422e-05, "loss": 0.4023, "step": 5798, "task_loss": 0.9520833492279053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27765029668807983, "epoch": 4.9, "learning_rate": 1.498611278831059e-05, "loss": 0.3703, "step": 5799, "task_loss": 0.47706156969070435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30171436071395874, "epoch": 4.9, "learning_rate": 1.498007487018476e-05, "loss": 0.3972, "step": 5800, "task_loss": 0.8631951212882996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.340791255235672, "epoch": 4.9, "learning_rate": 1.4974036952058932e-05, "loss": 0.374, "step": 5801, "task_loss": 1.0514858961105347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5358536243438721, "epoch": 4.9, "learning_rate": 1.49679990339331e-05, "loss": 0.3835, "step": 5802, "task_loss": 0.9370108842849731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17474089562892914, "epoch": 4.9, "learning_rate": 1.4961961115807271e-05, "loss": 0.2971, "step": 5803, "task_loss": 0.1834273636341095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3145199418067932, "epoch": 4.91, "learning_rate": 1.495592319768144e-05, "loss": 0.4316, "step": 5804, "task_loss": 0.7042176127433777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.451484739780426, "epoch": 4.91, "learning_rate": 1.4949885279555609e-05, "loss": 0.3728, "step": 5805, "task_loss": 0.36936965584754944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24084845185279846, "epoch": 4.91, "learning_rate": 1.494384736142978e-05, "loss": 0.314, "step": 5806, "task_loss": 0.6175845265388489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2297341525554657, "epoch": 4.91, "learning_rate": 1.4937809443303948e-05, "loss": 0.2869, "step": 5807, "task_loss": 0.44039222598075867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21994926035404205, "epoch": 4.91, "learning_rate": 1.493177152517812e-05, "loss": 0.4875, "step": 5808, "task_loss": 0.24240060150623322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4111299514770508, "epoch": 4.91, "learning_rate": 1.492573360705229e-05, "loss": 0.3242, "step": 5809, "task_loss": 0.7994420528411865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32927125692367554, "epoch": 4.91, "learning_rate": 1.4919695688926458e-05, "loss": 0.3854, "step": 5810, "task_loss": 1.016492247581482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2565951943397522, "epoch": 4.91, "learning_rate": 1.4913657770800629e-05, "loss": 0.3383, "step": 5811, "task_loss": 0.533898651599884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28024256229400635, "epoch": 4.91, "learning_rate": 1.4907619852674798e-05, "loss": 0.3732, "step": 5812, "task_loss": 0.3810567259788513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2065892070531845, "epoch": 4.91, "learning_rate": 1.4901581934548968e-05, "loss": 0.2884, "step": 5813, "task_loss": 0.5407649278640747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38429582118988037, "epoch": 4.91, "learning_rate": 1.4895544016423137e-05, "loss": 0.3802, "step": 5814, "task_loss": 0.5641566514968872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21907727420330048, "epoch": 4.91, "learning_rate": 1.4889506098297306e-05, "loss": 0.3255, "step": 5815, "task_loss": 0.9284094572067261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32308435440063477, "epoch": 4.92, "learning_rate": 1.4883468180171479e-05, "loss": 0.324, "step": 5816, "task_loss": 0.6150799989700317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20575033128261566, "epoch": 4.92, "learning_rate": 1.4877430262045647e-05, "loss": 0.2311, "step": 5817, "task_loss": 0.5614393949508667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28052324056625366, "epoch": 4.92, "learning_rate": 1.4871392343919818e-05, "loss": 0.3678, "step": 5818, "task_loss": 0.35254397988319397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3812541961669922, "epoch": 4.92, "learning_rate": 1.4865354425793987e-05, "loss": 0.3449, "step": 5819, "task_loss": 0.04606306925415993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23351667821407318, "epoch": 4.92, "learning_rate": 1.4859316507668156e-05, "loss": 0.3769, "step": 5820, "task_loss": 0.43359678983688354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46794193983078003, "epoch": 4.92, "learning_rate": 1.4853278589542326e-05, "loss": 0.3712, "step": 5821, "task_loss": 0.7862229943275452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3447559177875519, "epoch": 4.92, "learning_rate": 1.4847240671416495e-05, "loss": 0.4262, "step": 5822, "task_loss": 0.8496137857437134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.549251914024353, "epoch": 4.92, "learning_rate": 1.4841202753290667e-05, "loss": 0.375, "step": 5823, "task_loss": 1.0407413244247437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48229068517684937, "epoch": 4.92, "learning_rate": 1.4835164835164836e-05, "loss": 0.4013, "step": 5824, "task_loss": 0.6871600151062012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38175806403160095, "epoch": 4.92, "learning_rate": 1.4829126917039005e-05, "loss": 0.2927, "step": 5825, "task_loss": 0.3714204728603363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5946881175041199, "epoch": 4.92, "learning_rate": 1.4823088998913176e-05, "loss": 0.4033, "step": 5826, "task_loss": 1.0226919651031494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2637023627758026, "epoch": 4.93, "learning_rate": 1.4817051080787345e-05, "loss": 0.373, "step": 5827, "task_loss": 0.40031465888023376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15191248059272766, "epoch": 4.93, "learning_rate": 1.4811013162661517e-05, "loss": 0.2426, "step": 5828, "task_loss": 0.11540481448173523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2369057685136795, "epoch": 4.93, "learning_rate": 1.4804975244535684e-05, "loss": 0.4717, "step": 5829, "task_loss": 1.0650098323822021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43662554025650024, "epoch": 4.93, "learning_rate": 1.4798937326409853e-05, "loss": 0.3438, "step": 5830, "task_loss": 0.17688752710819244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4494057297706604, "epoch": 4.93, "learning_rate": 1.4792899408284025e-05, "loss": 0.4138, "step": 5831, "task_loss": 0.6536838412284851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43894249200820923, "epoch": 4.93, "learning_rate": 1.4786861490158194e-05, "loss": 0.3101, "step": 5832, "task_loss": 0.7031422257423401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4073083996772766, "epoch": 4.93, "learning_rate": 1.4780823572032365e-05, "loss": 0.3701, "step": 5833, "task_loss": 0.3793613612651825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2642236351966858, "epoch": 4.93, "learning_rate": 1.4774785653906534e-05, "loss": 0.2813, "step": 5834, "task_loss": 0.23465730249881744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5085819959640503, "epoch": 4.93, "learning_rate": 1.4768747735780702e-05, "loss": 0.4208, "step": 5835, "task_loss": 0.7538548111915588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4143712818622589, "epoch": 4.93, "learning_rate": 1.4762709817654875e-05, "loss": 0.3584, "step": 5836, "task_loss": 1.1802188158035278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22928208112716675, "epoch": 4.93, "learning_rate": 1.4756671899529042e-05, "loss": 0.3168, "step": 5837, "task_loss": 0.3903515040874481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48906341195106506, "epoch": 4.93, "learning_rate": 1.4750633981403214e-05, "loss": 0.3809, "step": 5838, "task_loss": 1.0570697784423828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2531161606311798, "epoch": 4.94, "learning_rate": 1.4744596063277383e-05, "loss": 0.3672, "step": 5839, "task_loss": 0.2967401444911957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20218782126903534, "epoch": 4.94, "learning_rate": 1.4738558145151552e-05, "loss": 0.284, "step": 5840, "task_loss": 0.09052062779664993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2905005216598511, "epoch": 4.94, "learning_rate": 1.4732520227025722e-05, "loss": 0.3192, "step": 5841, "task_loss": 0.3320857286453247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.384685754776001, "epoch": 4.94, "learning_rate": 1.4726482308899891e-05, "loss": 0.3703, "step": 5842, "task_loss": 0.21070057153701782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.509270966053009, "epoch": 4.94, "learning_rate": 1.4720444390774064e-05, "loss": 0.474, "step": 5843, "task_loss": 0.4888785481452942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7324286103248596, "epoch": 4.94, "learning_rate": 1.4714406472648232e-05, "loss": 0.5086, "step": 5844, "task_loss": 1.118077039718628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21765968203544617, "epoch": 4.94, "learning_rate": 1.47083685545224e-05, "loss": 0.2768, "step": 5845, "task_loss": 0.059942059218883514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31433817744255066, "epoch": 4.94, "learning_rate": 1.4702330636396572e-05, "loss": 0.3121, "step": 5846, "task_loss": 0.5278925895690918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3129268288612366, "epoch": 4.94, "learning_rate": 1.469629271827074e-05, "loss": 0.3685, "step": 5847, "task_loss": 0.10666128247976303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44666609168052673, "epoch": 4.94, "learning_rate": 1.4690254800144911e-05, "loss": 0.3578, "step": 5848, "task_loss": 1.2682123184204102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3532066345214844, "epoch": 4.94, "learning_rate": 1.468421688201908e-05, "loss": 0.289, "step": 5849, "task_loss": 0.20214307308197021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32623496651649475, "epoch": 4.94, "learning_rate": 1.4678178963893249e-05, "loss": 0.3714, "step": 5850, "task_loss": 1.2119863033294678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31595832109451294, "epoch": 4.95, "learning_rate": 1.4672141045767421e-05, "loss": 0.3761, "step": 5851, "task_loss": 0.2850269377231598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21521875262260437, "epoch": 4.95, "learning_rate": 1.466610312764159e-05, "loss": 0.4858, "step": 5852, "task_loss": 0.6277264952659607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4299688935279846, "epoch": 4.95, "learning_rate": 1.466006520951576e-05, "loss": 0.336, "step": 5853, "task_loss": 0.3977147340774536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21007093787193298, "epoch": 4.95, "learning_rate": 1.465402729138993e-05, "loss": 0.3852, "step": 5854, "task_loss": 0.6737109422683716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24773570895195007, "epoch": 4.95, "learning_rate": 1.4647989373264099e-05, "loss": 0.5063, "step": 5855, "task_loss": 0.4353468716144562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36366164684295654, "epoch": 4.95, "learning_rate": 1.464195145513827e-05, "loss": 0.3667, "step": 5856, "task_loss": 0.18685980141162872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3233502507209778, "epoch": 4.95, "learning_rate": 1.4635913537012438e-05, "loss": 0.3358, "step": 5857, "task_loss": 0.5083929896354675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3861609697341919, "epoch": 4.95, "learning_rate": 1.462987561888661e-05, "loss": 0.4016, "step": 5858, "task_loss": 1.006967306137085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1779721975326538, "epoch": 4.95, "learning_rate": 1.462383770076078e-05, "loss": 0.423, "step": 5859, "task_loss": 0.4814084768295288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2271987795829773, "epoch": 4.95, "learning_rate": 1.4617799782634946e-05, "loss": 0.2551, "step": 5860, "task_loss": 0.204209566116333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2690074145793915, "epoch": 4.95, "learning_rate": 1.4611761864509119e-05, "loss": 0.446, "step": 5861, "task_loss": 0.6276437044143677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21927416324615479, "epoch": 4.95, "learning_rate": 1.4605723946383288e-05, "loss": 0.3743, "step": 5862, "task_loss": 0.050760071724653244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27236461639404297, "epoch": 4.96, "learning_rate": 1.4599686028257458e-05, "loss": 0.3681, "step": 5863, "task_loss": 0.238896906375885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1995597928762436, "epoch": 4.96, "learning_rate": 1.4593648110131627e-05, "loss": 0.2844, "step": 5864, "task_loss": 0.25535333156585693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28272342681884766, "epoch": 4.96, "learning_rate": 1.4587610192005796e-05, "loss": 0.4672, "step": 5865, "task_loss": 1.005971074104309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36807405948638916, "epoch": 4.96, "learning_rate": 1.4581572273879968e-05, "loss": 0.3429, "step": 5866, "task_loss": 0.4633117914199829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2992868423461914, "epoch": 4.96, "learning_rate": 1.4575534355754137e-05, "loss": 0.3711, "step": 5867, "task_loss": 1.077210783958435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32307180762290955, "epoch": 4.96, "learning_rate": 1.4569496437628308e-05, "loss": 0.4429, "step": 5868, "task_loss": 0.6767479181289673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4963821768760681, "epoch": 4.96, "learning_rate": 1.4563458519502476e-05, "loss": 0.4898, "step": 5869, "task_loss": 1.1453402042388916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3553451597690582, "epoch": 4.96, "learning_rate": 1.4557420601376645e-05, "loss": 0.3406, "step": 5870, "task_loss": 0.9828073382377625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37172794342041016, "epoch": 4.96, "learning_rate": 1.4551382683250816e-05, "loss": 0.3025, "step": 5871, "task_loss": 0.10991734266281128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20119109749794006, "epoch": 4.96, "learning_rate": 1.4545344765124985e-05, "loss": 0.3584, "step": 5872, "task_loss": 0.398488849401474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36124956607818604, "epoch": 4.96, "learning_rate": 1.4539306846999157e-05, "loss": 0.2808, "step": 5873, "task_loss": 0.16409775614738464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18791192770004272, "epoch": 4.96, "learning_rate": 1.4533268928873326e-05, "loss": 0.2882, "step": 5874, "task_loss": 0.3119623363018036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28590279817581177, "epoch": 4.97, "learning_rate": 1.4527231010747495e-05, "loss": 0.3712, "step": 5875, "task_loss": 0.921726405620575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4305531680583954, "epoch": 4.97, "learning_rate": 1.4521193092621665e-05, "loss": 0.3607, "step": 5876, "task_loss": 0.22375677525997162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20060525834560394, "epoch": 4.97, "learning_rate": 1.4515155174495834e-05, "loss": 0.3873, "step": 5877, "task_loss": 0.7506094574928284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19619636237621307, "epoch": 4.97, "learning_rate": 1.4509117256370005e-05, "loss": 0.4543, "step": 5878, "task_loss": 0.48271042108535767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5104503631591797, "epoch": 4.97, "learning_rate": 1.4503079338244174e-05, "loss": 0.5459, "step": 5879, "task_loss": 0.7819879055023193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1778479367494583, "epoch": 4.97, "learning_rate": 1.4497041420118343e-05, "loss": 0.2677, "step": 5880, "task_loss": 0.3519758880138397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3915883004665375, "epoch": 4.97, "learning_rate": 1.4491003501992515e-05, "loss": 0.411, "step": 5881, "task_loss": 1.2754688262939453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48809391260147095, "epoch": 4.97, "learning_rate": 1.4484965583866684e-05, "loss": 0.4061, "step": 5882, "task_loss": 0.5174089074134827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.383968710899353, "epoch": 4.97, "learning_rate": 1.4478927665740854e-05, "loss": 0.3386, "step": 5883, "task_loss": 0.7228907346725464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33404237031936646, "epoch": 4.97, "learning_rate": 1.4472889747615023e-05, "loss": 0.3453, "step": 5884, "task_loss": 0.2642241418361664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.447514146566391, "epoch": 4.97, "learning_rate": 1.4466851829489192e-05, "loss": 0.3427, "step": 5885, "task_loss": 0.5660551190376282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3780747652053833, "epoch": 4.97, "learning_rate": 1.4460813911363363e-05, "loss": 0.3078, "step": 5886, "task_loss": 0.5333179235458374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18134984374046326, "epoch": 4.98, "learning_rate": 1.4454775993237531e-05, "loss": 0.2308, "step": 5887, "task_loss": 0.12143918126821518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4515698552131653, "epoch": 4.98, "learning_rate": 1.4448738075111704e-05, "loss": 0.3486, "step": 5888, "task_loss": 0.8351098895072937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2011580467224121, "epoch": 4.98, "learning_rate": 1.4442700156985873e-05, "loss": 0.2332, "step": 5889, "task_loss": 0.19910338521003723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3997766077518463, "epoch": 4.98, "learning_rate": 1.4436662238860041e-05, "loss": 0.3609, "step": 5890, "task_loss": 0.8600060343742371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.10943904519081116, "epoch": 4.98, "learning_rate": 1.4430624320734212e-05, "loss": 0.2661, "step": 5891, "task_loss": 0.29554417729377747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3102133870124817, "epoch": 4.98, "learning_rate": 1.4424586402608381e-05, "loss": 0.4298, "step": 5892, "task_loss": 0.3463926315307617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2914971113204956, "epoch": 4.98, "learning_rate": 1.4418548484482553e-05, "loss": 0.3022, "step": 5893, "task_loss": 0.1580694615840912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4822992980480194, "epoch": 4.98, "learning_rate": 1.441251056635672e-05, "loss": 0.4073, "step": 5894, "task_loss": 1.1465704441070557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3735610246658325, "epoch": 4.98, "learning_rate": 1.440647264823089e-05, "loss": 0.3748, "step": 5895, "task_loss": 0.7660421133041382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3022769093513489, "epoch": 4.98, "learning_rate": 1.4400434730105062e-05, "loss": 0.3388, "step": 5896, "task_loss": 0.391436904668808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31119710206985474, "epoch": 4.98, "learning_rate": 1.439439681197923e-05, "loss": 0.3874, "step": 5897, "task_loss": 0.5027901530265808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22612302005290985, "epoch": 4.99, "learning_rate": 1.4388358893853401e-05, "loss": 0.2143, "step": 5898, "task_loss": 0.25177913904190063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2847215533256531, "epoch": 4.99, "learning_rate": 1.438232097572757e-05, "loss": 0.2887, "step": 5899, "task_loss": 0.5423940420150757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.501730740070343, "epoch": 4.99, "learning_rate": 1.4376283057601739e-05, "loss": 0.3877, "step": 5900, "task_loss": 0.7700178623199463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2557544410228729, "epoch": 4.99, "learning_rate": 1.4370245139475911e-05, "loss": 0.2808, "step": 5901, "task_loss": 0.4912527799606323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.418819785118103, "epoch": 4.99, "learning_rate": 1.4364207221350078e-05, "loss": 0.3578, "step": 5902, "task_loss": 1.117626667022705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.546571671962738, "epoch": 4.99, "learning_rate": 1.4358169303224247e-05, "loss": 0.4393, "step": 5903, "task_loss": 0.22599491477012634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34151923656463623, "epoch": 4.99, "learning_rate": 1.435213138509842e-05, "loss": 0.3591, "step": 5904, "task_loss": 0.42458146810531616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41835081577301025, "epoch": 4.99, "learning_rate": 1.4346093466972588e-05, "loss": 0.403, "step": 5905, "task_loss": 0.6121962070465088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3155463933944702, "epoch": 4.99, "learning_rate": 1.4340055548846759e-05, "loss": 0.3219, "step": 5906, "task_loss": 0.46473854780197144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4825191795825958, "epoch": 4.99, "learning_rate": 1.4334017630720928e-05, "loss": 0.4239, "step": 5907, "task_loss": 0.7308828234672546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40464967489242554, "epoch": 4.99, "learning_rate": 1.4327979712595097e-05, "loss": 0.3577, "step": 5908, "task_loss": 0.5008689761161804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27334678173065186, "epoch": 4.99, "learning_rate": 1.4321941794469269e-05, "loss": 0.4419, "step": 5909, "task_loss": 1.24210786819458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3438184857368469, "epoch": 5.0, "learning_rate": 1.4315903876343436e-05, "loss": 0.3567, "step": 5910, "task_loss": 0.5007885694503784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3580932021141052, "epoch": 5.0, "learning_rate": 1.4309865958217608e-05, "loss": 0.318, "step": 5911, "task_loss": 0.9190899133682251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29744842648506165, "epoch": 5.0, "learning_rate": 1.4303828040091777e-05, "loss": 0.3964, "step": 5912, "task_loss": 0.20468881726264954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4211721122264862, "epoch": 5.0, "learning_rate": 1.4297790121965946e-05, "loss": 0.3651, "step": 5913, "task_loss": 0.29583999514579773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37054184079170227, "epoch": 5.0, "learning_rate": 1.4291752203840117e-05, "loss": 0.3515, "step": 5914, "task_loss": 0.545978307723999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2421240210533142, "epoch": 5.0, "learning_rate": 1.4285714285714285e-05, "loss": 0.3623, "step": 5915, "task_loss": 0.3686506152153015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3706384003162384, "epoch": 5.0, "learning_rate": 1.4279676367588458e-05, "loss": 0.6466, "step": 5916, "task_loss": 0.5570895671844482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3538556694984436, "epoch": 5.0, "learning_rate": 1.4273638449462627e-05, "loss": 0.3283, "step": 5917, "task_loss": 0.5490244030952454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28370922803878784, "epoch": 5.0, "learning_rate": 1.4267600531336794e-05, "loss": 0.3777, "step": 5918, "task_loss": 0.8918646574020386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18881550431251526, "epoch": 5.0, "learning_rate": 1.4261562613210966e-05, "loss": 0.3037, "step": 5919, "task_loss": 0.22657032310962677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5303105115890503, "epoch": 5.0, "learning_rate": 1.4255524695085135e-05, "loss": 0.4098, "step": 5920, "task_loss": 0.581682026386261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5380563735961914, "epoch": 5.01, "learning_rate": 1.4249486776959305e-05, "loss": 0.3786, "step": 5921, "task_loss": 0.37221163511276245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34240931272506714, "epoch": 5.01, "learning_rate": 1.4243448858833474e-05, "loss": 0.3686, "step": 5922, "task_loss": 0.624137282371521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4165332317352295, "epoch": 5.01, "learning_rate": 1.4237410940707643e-05, "loss": 0.4132, "step": 5923, "task_loss": 0.5159457921981812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29492777585983276, "epoch": 5.01, "learning_rate": 1.4231373022581815e-05, "loss": 0.3242, "step": 5924, "task_loss": 0.398601233959198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3139212727546692, "epoch": 5.01, "learning_rate": 1.4225335104455983e-05, "loss": 0.2897, "step": 5925, "task_loss": 0.38379931449890137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1421220451593399, "epoch": 5.01, "learning_rate": 1.4219297186330155e-05, "loss": 0.2819, "step": 5926, "task_loss": 0.8855472207069397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4289281964302063, "epoch": 5.01, "learning_rate": 1.4213259268204324e-05, "loss": 0.356, "step": 5927, "task_loss": 1.2452096939086914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28172966837882996, "epoch": 5.01, "learning_rate": 1.4207221350078493e-05, "loss": 0.3236, "step": 5928, "task_loss": 0.07368165254592896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3361627459526062, "epoch": 5.01, "learning_rate": 1.4201183431952663e-05, "loss": 0.2836, "step": 5929, "task_loss": 0.18780970573425293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.383739173412323, "epoch": 5.01, "learning_rate": 1.4195145513826832e-05, "loss": 0.3831, "step": 5930, "task_loss": 0.24397379159927368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3558858036994934, "epoch": 5.01, "learning_rate": 1.4189107595701004e-05, "loss": 0.3981, "step": 5931, "task_loss": 1.4189794063568115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28955015540122986, "epoch": 5.01, "learning_rate": 1.4183069677575173e-05, "loss": 0.3319, "step": 5932, "task_loss": 0.5286428928375244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28802797198295593, "epoch": 5.02, "learning_rate": 1.417703175944934e-05, "loss": 0.2886, "step": 5933, "task_loss": 1.0640721321105957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17807379364967346, "epoch": 5.02, "learning_rate": 1.4170993841323513e-05, "loss": 0.2935, "step": 5934, "task_loss": 0.3359358310699463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29936978220939636, "epoch": 5.02, "learning_rate": 1.4164955923197682e-05, "loss": 0.257, "step": 5935, "task_loss": 0.3498486578464508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30149123072624207, "epoch": 5.02, "learning_rate": 1.4158918005071852e-05, "loss": 0.3778, "step": 5936, "task_loss": 0.14011149108409882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2930336892604828, "epoch": 5.02, "learning_rate": 1.4152880086946021e-05, "loss": 0.3513, "step": 5937, "task_loss": 0.6275894641876221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28211724758148193, "epoch": 5.02, "learning_rate": 1.414684216882019e-05, "loss": 0.3465, "step": 5938, "task_loss": 0.3610455393791199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37129300832748413, "epoch": 5.02, "learning_rate": 1.4140804250694362e-05, "loss": 0.3621, "step": 5939, "task_loss": 1.244894027709961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.502269983291626, "epoch": 5.02, "learning_rate": 1.4134766332568531e-05, "loss": 0.3622, "step": 5940, "task_loss": 0.5792396664619446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3231351375579834, "epoch": 5.02, "learning_rate": 1.4128728414442702e-05, "loss": 0.3467, "step": 5941, "task_loss": 1.1771519184112549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4357582926750183, "epoch": 5.02, "learning_rate": 1.412269049631687e-05, "loss": 0.3667, "step": 5942, "task_loss": 0.4470100402832031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22493740916252136, "epoch": 5.02, "learning_rate": 1.411665257819104e-05, "loss": 0.3664, "step": 5943, "task_loss": 0.6445683240890503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44275662302970886, "epoch": 5.02, "learning_rate": 1.411061466006521e-05, "loss": 0.4157, "step": 5944, "task_loss": 0.8490671515464783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.377575546503067, "epoch": 5.03, "learning_rate": 1.4104576741939379e-05, "loss": 0.3628, "step": 5945, "task_loss": 0.6825645565986633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31701594591140747, "epoch": 5.03, "learning_rate": 1.4098538823813551e-05, "loss": 0.2359, "step": 5946, "task_loss": 0.10369012504816055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3601973056793213, "epoch": 5.03, "learning_rate": 1.409250090568772e-05, "loss": 0.3794, "step": 5947, "task_loss": 0.6949377059936523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25246816873550415, "epoch": 5.03, "learning_rate": 1.4086462987561889e-05, "loss": 0.3706, "step": 5948, "task_loss": 1.2679619789123535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27683940529823303, "epoch": 5.03, "learning_rate": 1.408042506943606e-05, "loss": 0.3682, "step": 5949, "task_loss": 0.16811788082122803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32092130184173584, "epoch": 5.03, "learning_rate": 1.4074387151310228e-05, "loss": 0.3008, "step": 5950, "task_loss": 0.4299968183040619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23090168833732605, "epoch": 5.03, "learning_rate": 1.4068349233184399e-05, "loss": 0.3025, "step": 5951, "task_loss": 0.6984795928001404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4441968500614166, "epoch": 5.03, "learning_rate": 1.4062311315058568e-05, "loss": 0.3621, "step": 5952, "task_loss": 0.39340880513191223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34749749302864075, "epoch": 5.03, "learning_rate": 1.4056273396932737e-05, "loss": 0.3721, "step": 5953, "task_loss": 0.8271662592887878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2625184655189514, "epoch": 5.03, "learning_rate": 1.4050235478806909e-05, "loss": 0.2858, "step": 5954, "task_loss": 0.1434766799211502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5984960794448853, "epoch": 5.03, "learning_rate": 1.4044197560681078e-05, "loss": 0.4439, "step": 5955, "task_loss": 0.9515314102172852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3684883415699005, "epoch": 5.03, "learning_rate": 1.4038159642555248e-05, "loss": 0.314, "step": 5956, "task_loss": 0.611068606376648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16631367802619934, "epoch": 5.04, "learning_rate": 1.4032121724429417e-05, "loss": 0.2339, "step": 5957, "task_loss": 0.26790156960487366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33611637353897095, "epoch": 5.04, "learning_rate": 1.4026083806303586e-05, "loss": 0.4066, "step": 5958, "task_loss": 0.4870677888393402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26384735107421875, "epoch": 5.04, "learning_rate": 1.4020045888177757e-05, "loss": 0.3219, "step": 5959, "task_loss": 1.2805792093276978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23833321034908295, "epoch": 5.04, "learning_rate": 1.4014007970051926e-05, "loss": 0.2999, "step": 5960, "task_loss": 0.1874120533466339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2271578013896942, "epoch": 5.04, "learning_rate": 1.4007970051926098e-05, "loss": 0.3049, "step": 5961, "task_loss": 0.11443410068750381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5331374406814575, "epoch": 5.04, "learning_rate": 1.4001932133800267e-05, "loss": 0.3553, "step": 5962, "task_loss": 0.6140674352645874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.402751624584198, "epoch": 5.04, "learning_rate": 1.3995894215674436e-05, "loss": 0.379, "step": 5963, "task_loss": 1.256068468093872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3077881336212158, "epoch": 5.04, "learning_rate": 1.3989856297548606e-05, "loss": 0.3358, "step": 5964, "task_loss": 0.5528014302253723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3071507513523102, "epoch": 5.04, "learning_rate": 1.3983818379422775e-05, "loss": 0.3579, "step": 5965, "task_loss": 1.1920123100280762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.459580659866333, "epoch": 5.04, "learning_rate": 1.3977780461296947e-05, "loss": 0.4203, "step": 5966, "task_loss": 1.1262434720993042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2725825309753418, "epoch": 5.04, "learning_rate": 1.3971742543171114e-05, "loss": 0.357, "step": 5967, "task_loss": 0.5748112797737122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.538134753704071, "epoch": 5.04, "learning_rate": 1.3965704625045283e-05, "loss": 0.3929, "step": 5968, "task_loss": 0.8232698440551758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3935888409614563, "epoch": 5.05, "learning_rate": 1.3959666706919456e-05, "loss": 0.4405, "step": 5969, "task_loss": 1.390718936920166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2945594787597656, "epoch": 5.05, "learning_rate": 1.3953628788793624e-05, "loss": 0.4593, "step": 5970, "task_loss": 0.0927414521574974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4595268964767456, "epoch": 5.05, "learning_rate": 1.3947590870667795e-05, "loss": 0.3294, "step": 5971, "task_loss": 0.9597863554954529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4040312170982361, "epoch": 5.05, "learning_rate": 1.3941552952541964e-05, "loss": 0.3747, "step": 5972, "task_loss": 0.8186459541320801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3353765308856964, "epoch": 5.05, "learning_rate": 1.3935515034416133e-05, "loss": 0.3014, "step": 5973, "task_loss": 0.439534991979599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23175014555454254, "epoch": 5.05, "learning_rate": 1.3929477116290305e-05, "loss": 0.3783, "step": 5974, "task_loss": 0.585415780544281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3107420802116394, "epoch": 5.05, "learning_rate": 1.3923439198164472e-05, "loss": 0.267, "step": 5975, "task_loss": 0.475877583026886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36519739031791687, "epoch": 5.05, "learning_rate": 1.3917401280038645e-05, "loss": 0.3331, "step": 5976, "task_loss": 1.1611772775650024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27431532740592957, "epoch": 5.05, "learning_rate": 1.3911363361912813e-05, "loss": 0.3508, "step": 5977, "task_loss": 0.26338106393814087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5212816596031189, "epoch": 5.05, "learning_rate": 1.3905325443786982e-05, "loss": 0.438, "step": 5978, "task_loss": 0.11644130945205688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2777765393257141, "epoch": 5.05, "learning_rate": 1.3899287525661153e-05, "loss": 0.2892, "step": 5979, "task_loss": 0.11110787838697433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1888619214296341, "epoch": 5.05, "learning_rate": 1.3893249607535322e-05, "loss": 0.309, "step": 5980, "task_loss": 0.13800092041492462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3606197237968445, "epoch": 5.06, "learning_rate": 1.3887211689409494e-05, "loss": 0.3775, "step": 5981, "task_loss": 0.9902743697166443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4760954976081848, "epoch": 5.06, "learning_rate": 1.3881173771283663e-05, "loss": 0.4558, "step": 5982, "task_loss": 1.215860366821289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43665310740470886, "epoch": 5.06, "learning_rate": 1.387513585315783e-05, "loss": 0.3756, "step": 5983, "task_loss": 0.37888702750205994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4715481400489807, "epoch": 5.06, "learning_rate": 1.3869097935032002e-05, "loss": 0.4884, "step": 5984, "task_loss": 0.9694283604621887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.537246584892273, "epoch": 5.06, "learning_rate": 1.3863060016906171e-05, "loss": 0.3475, "step": 5985, "task_loss": 1.3188785314559937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2243865728378296, "epoch": 5.06, "learning_rate": 1.3857022098780342e-05, "loss": 0.2959, "step": 5986, "task_loss": 0.8647588491439819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44447144865989685, "epoch": 5.06, "learning_rate": 1.385098418065451e-05, "loss": 0.3454, "step": 5987, "task_loss": 1.7188267707824707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2622881531715393, "epoch": 5.06, "learning_rate": 1.384494626252868e-05, "loss": 0.3187, "step": 5988, "task_loss": 0.17886987328529358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27088797092437744, "epoch": 5.06, "learning_rate": 1.3838908344402852e-05, "loss": 0.3063, "step": 5989, "task_loss": 0.9491754770278931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2618738114833832, "epoch": 5.06, "learning_rate": 1.3832870426277019e-05, "loss": 0.337, "step": 5990, "task_loss": 0.5062279105186462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3142494857311249, "epoch": 5.06, "learning_rate": 1.3826832508151191e-05, "loss": 0.3703, "step": 5991, "task_loss": 0.9868240356445312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3830317258834839, "epoch": 5.07, "learning_rate": 1.382079459002536e-05, "loss": 0.378, "step": 5992, "task_loss": 0.15749137103557587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3350278437137604, "epoch": 5.07, "learning_rate": 1.3814756671899529e-05, "loss": 0.3619, "step": 5993, "task_loss": 1.015709638595581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7073035836219788, "epoch": 5.07, "learning_rate": 1.38087187537737e-05, "loss": 0.3978, "step": 5994, "task_loss": 0.8635609149932861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3120648264884949, "epoch": 5.07, "learning_rate": 1.3802680835647868e-05, "loss": 0.3985, "step": 5995, "task_loss": 0.9525788426399231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20073309540748596, "epoch": 5.07, "learning_rate": 1.379664291752204e-05, "loss": 0.3123, "step": 5996, "task_loss": 0.7006676197052002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3624360263347626, "epoch": 5.07, "learning_rate": 1.379060499939621e-05, "loss": 0.3442, "step": 5997, "task_loss": 0.11561544984579086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21260707080364227, "epoch": 5.07, "learning_rate": 1.3784567081270377e-05, "loss": 0.3274, "step": 5998, "task_loss": 0.04991577938199043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34612974524497986, "epoch": 5.07, "learning_rate": 1.3778529163144549e-05, "loss": 0.2624, "step": 5999, "task_loss": 0.7102769613265991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36412757635116577, "epoch": 5.07, "learning_rate": 1.3772491245018718e-05, "loss": 0.4232, "step": 6000, "task_loss": 0.13070160150527954 }, { "epoch": 5.07, "eval_accuracy": 0.916910891089109, "eval_loss": 0.22589653730392456, "eval_runtime": 317.2031, "eval_samples_per_second": 79.602, "eval_steps_per_second": 0.624, "step": 6000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5002893209457397, "epoch": 5.07, "learning_rate": 1.3766453326892888e-05, "loss": 0.3805, "step": 6001, "task_loss": 0.9898810982704163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2689321041107178, "epoch": 5.07, "learning_rate": 1.3760415408767057e-05, "loss": 0.459, "step": 6002, "task_loss": 0.9997177720069885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3375746011734009, "epoch": 5.07, "learning_rate": 1.3754377490641226e-05, "loss": 0.3582, "step": 6003, "task_loss": 0.09780100733041763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3140847384929657, "epoch": 5.08, "learning_rate": 1.3748339572515398e-05, "loss": 0.2641, "step": 6004, "task_loss": 0.8673738837242126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3848434090614319, "epoch": 5.08, "learning_rate": 1.3742301654389567e-05, "loss": 0.3088, "step": 6005, "task_loss": 1.007926344871521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27095329761505127, "epoch": 5.08, "learning_rate": 1.3736263736263738e-05, "loss": 0.2514, "step": 6006, "task_loss": 0.3171061873435974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.306831419467926, "epoch": 5.08, "learning_rate": 1.3730225818137907e-05, "loss": 0.2724, "step": 6007, "task_loss": 1.0966506004333496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26166266202926636, "epoch": 5.08, "learning_rate": 1.3724187900012076e-05, "loss": 0.278, "step": 6008, "task_loss": 0.47443506121635437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31173020601272583, "epoch": 5.08, "learning_rate": 1.3718149981886246e-05, "loss": 0.285, "step": 6009, "task_loss": 0.9566235542297363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18153494596481323, "epoch": 5.08, "learning_rate": 1.3712112063760415e-05, "loss": 0.3828, "step": 6010, "task_loss": 0.15062899887561798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4744820296764374, "epoch": 5.08, "learning_rate": 1.3706074145634587e-05, "loss": 0.403, "step": 6011, "task_loss": 0.6336028575897217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2684153616428375, "epoch": 5.08, "learning_rate": 1.3700036227508756e-05, "loss": 0.3785, "step": 6012, "task_loss": 0.27771079540252686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4523397386074066, "epoch": 5.08, "learning_rate": 1.3693998309382925e-05, "loss": 0.3568, "step": 6013, "task_loss": 1.1592422723770142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3775902986526489, "epoch": 5.08, "learning_rate": 1.3687960391257096e-05, "loss": 0.3144, "step": 6014, "task_loss": 0.11900301277637482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23150646686553955, "epoch": 5.08, "learning_rate": 1.3681922473131265e-05, "loss": 0.2765, "step": 6015, "task_loss": 0.2877809703350067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2824227809906006, "epoch": 5.09, "learning_rate": 1.3675884555005435e-05, "loss": 0.2632, "step": 6016, "task_loss": 0.52577143907547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2667897939682007, "epoch": 5.09, "learning_rate": 1.3669846636879604e-05, "loss": 0.3514, "step": 6017, "task_loss": 0.26074180006980896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3834590017795563, "epoch": 5.09, "learning_rate": 1.3663808718753773e-05, "loss": 0.3564, "step": 6018, "task_loss": 0.86427241563797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23475994169712067, "epoch": 5.09, "learning_rate": 1.3657770800627945e-05, "loss": 0.3509, "step": 6019, "task_loss": 0.03230876475572586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26406651735305786, "epoch": 5.09, "learning_rate": 1.3651732882502114e-05, "loss": 0.3523, "step": 6020, "task_loss": 0.4586513042449951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48319754004478455, "epoch": 5.09, "learning_rate": 1.3645694964376285e-05, "loss": 0.3738, "step": 6021, "task_loss": 1.2178001403808594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22157078981399536, "epoch": 5.09, "learning_rate": 1.3639657046250454e-05, "loss": 0.3729, "step": 6022, "task_loss": 0.6711223721504211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31453561782836914, "epoch": 5.09, "learning_rate": 1.3633619128124622e-05, "loss": 0.3829, "step": 6023, "task_loss": 0.39629027247428894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35527503490448, "epoch": 5.09, "learning_rate": 1.3627581209998793e-05, "loss": 0.3565, "step": 6024, "task_loss": 0.10729299485683441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3819091022014618, "epoch": 5.09, "learning_rate": 1.3621543291872962e-05, "loss": 0.2645, "step": 6025, "task_loss": 0.5103284120559692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24619674682617188, "epoch": 5.09, "learning_rate": 1.3615505373747134e-05, "loss": 0.2336, "step": 6026, "task_loss": 0.14977005124092102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39435112476348877, "epoch": 5.09, "learning_rate": 1.3609467455621303e-05, "loss": 0.3648, "step": 6027, "task_loss": 0.24982237815856934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3808556795120239, "epoch": 5.1, "learning_rate": 1.3603429537495472e-05, "loss": 0.3422, "step": 6028, "task_loss": 0.8836329579353333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3114139139652252, "epoch": 5.1, "learning_rate": 1.3597391619369642e-05, "loss": 0.4125, "step": 6029, "task_loss": 0.449627161026001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3036872446537018, "epoch": 5.1, "learning_rate": 1.3591353701243811e-05, "loss": 0.3388, "step": 6030, "task_loss": 0.7794480323791504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23459120094776154, "epoch": 5.1, "learning_rate": 1.3585315783117984e-05, "loss": 0.3056, "step": 6031, "task_loss": 0.1908271461725235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4366658926010132, "epoch": 5.1, "learning_rate": 1.357927786499215e-05, "loss": 0.4132, "step": 6032, "task_loss": 0.4638592302799225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31534838676452637, "epoch": 5.1, "learning_rate": 1.357323994686632e-05, "loss": 0.3833, "step": 6033, "task_loss": 1.5238125324249268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27436330914497375, "epoch": 5.1, "learning_rate": 1.3567202028740492e-05, "loss": 0.2854, "step": 6034, "task_loss": 0.19820642471313477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3935742974281311, "epoch": 5.1, "learning_rate": 1.356116411061466e-05, "loss": 0.3702, "step": 6035, "task_loss": 0.5797401666641235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2610022723674774, "epoch": 5.1, "learning_rate": 1.3555126192488831e-05, "loss": 0.3682, "step": 6036, "task_loss": 0.17297688126564026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.412901371717453, "epoch": 5.1, "learning_rate": 1.3549088274363e-05, "loss": 0.4345, "step": 6037, "task_loss": 0.6827746033668518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3023720383644104, "epoch": 5.1, "learning_rate": 1.3543050356237169e-05, "loss": 0.3588, "step": 6038, "task_loss": 0.56526780128479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.251968652009964, "epoch": 5.1, "learning_rate": 1.3537012438111341e-05, "loss": 0.3663, "step": 6039, "task_loss": 0.5012864470481873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19557583332061768, "epoch": 5.11, "learning_rate": 1.3530974519985509e-05, "loss": 0.2567, "step": 6040, "task_loss": 0.4516271650791168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3192288875579834, "epoch": 5.11, "learning_rate": 1.352493660185968e-05, "loss": 0.4553, "step": 6041, "task_loss": 0.14560957252979279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5146927237510681, "epoch": 5.11, "learning_rate": 1.351889868373385e-05, "loss": 0.3932, "step": 6042, "task_loss": 0.4534657895565033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3561967611312866, "epoch": 5.11, "learning_rate": 1.3512860765608019e-05, "loss": 0.3411, "step": 6043, "task_loss": 0.6359057426452637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3061889410018921, "epoch": 5.11, "learning_rate": 1.350682284748219e-05, "loss": 0.4112, "step": 6044, "task_loss": 0.9791978597640991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2573489248752594, "epoch": 5.11, "learning_rate": 1.3500784929356358e-05, "loss": 0.4072, "step": 6045, "task_loss": 0.8968371748924255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29881972074508667, "epoch": 5.11, "learning_rate": 1.349474701123053e-05, "loss": 0.3198, "step": 6046, "task_loss": 1.426586389541626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37495672702789307, "epoch": 5.11, "learning_rate": 1.3488709093104697e-05, "loss": 0.3109, "step": 6047, "task_loss": 0.30028173327445984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2645444869995117, "epoch": 5.11, "learning_rate": 1.3482671174978866e-05, "loss": 0.3577, "step": 6048, "task_loss": 0.26711350679397583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3777230381965637, "epoch": 5.11, "learning_rate": 1.3476633256853039e-05, "loss": 0.373, "step": 6049, "task_loss": 0.7839293479919434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26116466522216797, "epoch": 5.11, "learning_rate": 1.3470595338727207e-05, "loss": 0.3391, "step": 6050, "task_loss": 1.3248940706253052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24608328938484192, "epoch": 5.11, "learning_rate": 1.3464557420601378e-05, "loss": 0.3419, "step": 6051, "task_loss": 0.9044433832168579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2520909905433655, "epoch": 5.12, "learning_rate": 1.3458519502475547e-05, "loss": 0.3616, "step": 6052, "task_loss": 0.4953603446483612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35099369287490845, "epoch": 5.12, "learning_rate": 1.3452481584349716e-05, "loss": 0.3022, "step": 6053, "task_loss": 0.7486429214477539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40225398540496826, "epoch": 5.12, "learning_rate": 1.3446443666223888e-05, "loss": 0.285, "step": 6054, "task_loss": 0.7235279083251953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4318292438983917, "epoch": 5.12, "learning_rate": 1.3440405748098055e-05, "loss": 0.459, "step": 6055, "task_loss": 1.4305362701416016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2553286552429199, "epoch": 5.12, "learning_rate": 1.3434367829972228e-05, "loss": 0.3849, "step": 6056, "task_loss": 0.3568253219127655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36302468180656433, "epoch": 5.12, "learning_rate": 1.3428329911846396e-05, "loss": 0.2603, "step": 6057, "task_loss": 0.5302322506904602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24592837691307068, "epoch": 5.12, "learning_rate": 1.3422291993720565e-05, "loss": 0.3456, "step": 6058, "task_loss": 0.6133561730384827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4680793583393097, "epoch": 5.12, "learning_rate": 1.3416254075594736e-05, "loss": 0.3543, "step": 6059, "task_loss": 0.7194826602935791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23530268669128418, "epoch": 5.12, "learning_rate": 1.3410216157468905e-05, "loss": 0.2739, "step": 6060, "task_loss": 0.26583555340766907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34639376401901245, "epoch": 5.12, "learning_rate": 1.3404178239343077e-05, "loss": 0.5156, "step": 6061, "task_loss": 1.8703515529632568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1749170571565628, "epoch": 5.12, "learning_rate": 1.3398140321217246e-05, "loss": 0.3155, "step": 6062, "task_loss": 0.20058798789978027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34104275703430176, "epoch": 5.13, "learning_rate": 1.3392102403091413e-05, "loss": 0.3174, "step": 6063, "task_loss": 0.20887954533100128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3333432078361511, "epoch": 5.13, "learning_rate": 1.3386064484965585e-05, "loss": 0.294, "step": 6064, "task_loss": 0.6893952488899231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39951860904693604, "epoch": 5.13, "learning_rate": 1.3380026566839754e-05, "loss": 0.3157, "step": 6065, "task_loss": 0.3857825994491577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2726556062698364, "epoch": 5.13, "learning_rate": 1.3373988648713925e-05, "loss": 0.3135, "step": 6066, "task_loss": 0.3392603099346161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38896456360816956, "epoch": 5.13, "learning_rate": 1.3367950730588094e-05, "loss": 0.4178, "step": 6067, "task_loss": 1.2980172634124756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2761536240577698, "epoch": 5.13, "learning_rate": 1.3361912812462263e-05, "loss": 0.4155, "step": 6068, "task_loss": 0.3535202145576477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15824556350708008, "epoch": 5.13, "learning_rate": 1.3355874894336435e-05, "loss": 0.3282, "step": 6069, "task_loss": 0.2011546641588211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28939390182495117, "epoch": 5.13, "learning_rate": 1.3349836976210604e-05, "loss": 0.4191, "step": 6070, "task_loss": 0.45122525095939636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44146841764450073, "epoch": 5.13, "learning_rate": 1.3343799058084774e-05, "loss": 0.3779, "step": 6071, "task_loss": 1.7804816961288452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5181756019592285, "epoch": 5.13, "learning_rate": 1.3337761139958943e-05, "loss": 0.4983, "step": 6072, "task_loss": 0.868038535118103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.257127583026886, "epoch": 5.13, "learning_rate": 1.3331723221833112e-05, "loss": 0.3272, "step": 6073, "task_loss": 0.22861061990261078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4188408851623535, "epoch": 5.13, "learning_rate": 1.3325685303707283e-05, "loss": 0.3765, "step": 6074, "task_loss": 0.8788424730300903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3567690849304199, "epoch": 5.14, "learning_rate": 1.3319647385581451e-05, "loss": 0.4098, "step": 6075, "task_loss": 0.5291708707809448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15630601346492767, "epoch": 5.14, "learning_rate": 1.3313609467455624e-05, "loss": 0.3272, "step": 6076, "task_loss": 0.024406004697084427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5124835968017578, "epoch": 5.14, "learning_rate": 1.3307571549329793e-05, "loss": 0.3838, "step": 6077, "task_loss": 1.172258734703064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34036338329315186, "epoch": 5.14, "learning_rate": 1.3301533631203961e-05, "loss": 0.3772, "step": 6078, "task_loss": 1.1852664947509766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39946725964546204, "epoch": 5.14, "learning_rate": 1.3295495713078132e-05, "loss": 0.4129, "step": 6079, "task_loss": 0.30305686593055725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21265669167041779, "epoch": 5.14, "learning_rate": 1.3289457794952301e-05, "loss": 0.3984, "step": 6080, "task_loss": 0.416598916053772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.283516526222229, "epoch": 5.14, "learning_rate": 1.328341987682647e-05, "loss": 0.3288, "step": 6081, "task_loss": 0.14957216382026672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18156005442142487, "epoch": 5.14, "learning_rate": 1.327738195870064e-05, "loss": 0.3618, "step": 6082, "task_loss": 0.8290088176727295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2921946346759796, "epoch": 5.14, "learning_rate": 1.327134404057481e-05, "loss": 0.3687, "step": 6083, "task_loss": 1.0659382343292236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21508273482322693, "epoch": 5.14, "learning_rate": 1.3265306122448982e-05, "loss": 0.424, "step": 6084, "task_loss": 0.5317401885986328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1602592170238495, "epoch": 5.14, "learning_rate": 1.325926820432315e-05, "loss": 0.4005, "step": 6085, "task_loss": 0.6873450875282288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27094176411628723, "epoch": 5.14, "learning_rate": 1.325323028619732e-05, "loss": 0.3938, "step": 6086, "task_loss": 0.5739681124687195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3037988543510437, "epoch": 5.15, "learning_rate": 1.324719236807149e-05, "loss": 0.3863, "step": 6087, "task_loss": 0.21553269028663635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4916405975818634, "epoch": 5.15, "learning_rate": 1.3241154449945659e-05, "loss": 0.3997, "step": 6088, "task_loss": 0.5573102831840515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3141964077949524, "epoch": 5.15, "learning_rate": 1.323511653181983e-05, "loss": 0.3098, "step": 6089, "task_loss": 0.45188969373703003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15531042218208313, "epoch": 5.15, "learning_rate": 1.3229078613693998e-05, "loss": 0.3241, "step": 6090, "task_loss": 0.17922566831111908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3181304931640625, "epoch": 5.15, "learning_rate": 1.3223040695568167e-05, "loss": 0.3015, "step": 6091, "task_loss": 0.31551221013069153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3489677309989929, "epoch": 5.15, "learning_rate": 1.321700277744234e-05, "loss": 0.3737, "step": 6092, "task_loss": 0.5121562480926514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4518738389015198, "epoch": 5.15, "learning_rate": 1.3210964859316508e-05, "loss": 0.4309, "step": 6093, "task_loss": 0.6892078518867493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22817203402519226, "epoch": 5.15, "learning_rate": 1.3204926941190679e-05, "loss": 0.3513, "step": 6094, "task_loss": 0.17308920621871948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33630916476249695, "epoch": 5.15, "learning_rate": 1.3198889023064848e-05, "loss": 0.3752, "step": 6095, "task_loss": 0.8592535257339478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3921385407447815, "epoch": 5.15, "learning_rate": 1.3192851104939017e-05, "loss": 0.3851, "step": 6096, "task_loss": 1.4345797300338745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3104472756385803, "epoch": 5.15, "learning_rate": 1.3186813186813187e-05, "loss": 0.3083, "step": 6097, "task_loss": 0.8679472804069519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35278838872909546, "epoch": 5.15, "learning_rate": 1.3180775268687356e-05, "loss": 0.4119, "step": 6098, "task_loss": 1.0160245895385742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38889026641845703, "epoch": 5.16, "learning_rate": 1.3174737350561528e-05, "loss": 0.4032, "step": 6099, "task_loss": 0.7300900816917419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15498784184455872, "epoch": 5.16, "learning_rate": 1.3168699432435697e-05, "loss": 0.3562, "step": 6100, "task_loss": 0.4119630455970764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4098846912384033, "epoch": 5.16, "learning_rate": 1.3162661514309866e-05, "loss": 0.4131, "step": 6101, "task_loss": 1.8720093965530396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3605012595653534, "epoch": 5.16, "learning_rate": 1.3156623596184037e-05, "loss": 0.3931, "step": 6102, "task_loss": 0.22692641615867615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7651031017303467, "epoch": 5.16, "learning_rate": 1.3150585678058205e-05, "loss": 0.3937, "step": 6103, "task_loss": 0.9579556584358215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24448394775390625, "epoch": 5.16, "learning_rate": 1.3144547759932378e-05, "loss": 0.3143, "step": 6104, "task_loss": 0.3216593265533447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.401980996131897, "epoch": 5.16, "learning_rate": 1.3138509841806545e-05, "loss": 0.2656, "step": 6105, "task_loss": 0.23756329715251923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37076064944267273, "epoch": 5.16, "learning_rate": 1.3132471923680714e-05, "loss": 0.306, "step": 6106, "task_loss": 0.293645977973938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2874169945716858, "epoch": 5.16, "learning_rate": 1.3126434005554886e-05, "loss": 0.4005, "step": 6107, "task_loss": 0.4826385974884033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40337425470352173, "epoch": 5.16, "learning_rate": 1.3120396087429055e-05, "loss": 0.4136, "step": 6108, "task_loss": 0.6802683472633362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27480536699295044, "epoch": 5.16, "learning_rate": 1.3114358169303225e-05, "loss": 0.373, "step": 6109, "task_loss": 1.278589129447937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4012201428413391, "epoch": 5.16, "learning_rate": 1.3108320251177394e-05, "loss": 0.3052, "step": 6110, "task_loss": 1.1304491758346558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.329190194606781, "epoch": 5.17, "learning_rate": 1.3102282333051563e-05, "loss": 0.3728, "step": 6111, "task_loss": 0.7190659046173096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4288591742515564, "epoch": 5.17, "learning_rate": 1.3096244414925734e-05, "loss": 0.5073, "step": 6112, "task_loss": 0.8812048435211182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2580195665359497, "epoch": 5.17, "learning_rate": 1.3090206496799903e-05, "loss": 0.3721, "step": 6113, "task_loss": 0.4768928587436676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3728943467140198, "epoch": 5.17, "learning_rate": 1.3084168578674075e-05, "loss": 0.3598, "step": 6114, "task_loss": 0.7358054518699646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23165424168109894, "epoch": 5.17, "learning_rate": 1.3078130660548244e-05, "loss": 0.3508, "step": 6115, "task_loss": 0.22919994592666626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3301008343696594, "epoch": 5.17, "learning_rate": 1.3072092742422413e-05, "loss": 0.2868, "step": 6116, "task_loss": 0.45791399478912354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24125374853610992, "epoch": 5.17, "learning_rate": 1.3066054824296583e-05, "loss": 0.2907, "step": 6117, "task_loss": 0.25976061820983887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3982039988040924, "epoch": 5.17, "learning_rate": 1.3060016906170752e-05, "loss": 0.3884, "step": 6118, "task_loss": 0.8354839086532593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.362647145986557, "epoch": 5.17, "learning_rate": 1.3053978988044924e-05, "loss": 0.3788, "step": 6119, "task_loss": 0.43696776032447815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2695312798023224, "epoch": 5.17, "learning_rate": 1.3047941069919092e-05, "loss": 0.2988, "step": 6120, "task_loss": 0.16806140542030334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41045987606048584, "epoch": 5.17, "learning_rate": 1.304190315179326e-05, "loss": 0.3843, "step": 6121, "task_loss": 1.050892949104309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.374784916639328, "epoch": 5.17, "learning_rate": 1.3035865233667433e-05, "loss": 0.4168, "step": 6122, "task_loss": 0.3756651282310486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34442004561424255, "epoch": 5.18, "learning_rate": 1.3029827315541602e-05, "loss": 0.3742, "step": 6123, "task_loss": 0.4979253113269806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24498307704925537, "epoch": 5.18, "learning_rate": 1.3023789397415772e-05, "loss": 0.3471, "step": 6124, "task_loss": 0.6566387414932251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3400336503982544, "epoch": 5.18, "learning_rate": 1.3017751479289941e-05, "loss": 0.2969, "step": 6125, "task_loss": 0.6532499194145203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7962508201599121, "epoch": 5.18, "learning_rate": 1.301171356116411e-05, "loss": 0.5181, "step": 6126, "task_loss": 0.9581146836280823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23443904519081116, "epoch": 5.18, "learning_rate": 1.3005675643038282e-05, "loss": 0.2743, "step": 6127, "task_loss": 0.10088001191616058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7834734916687012, "epoch": 5.18, "learning_rate": 1.299963772491245e-05, "loss": 0.4727, "step": 6128, "task_loss": 0.43826213479042053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4025552272796631, "epoch": 5.18, "learning_rate": 1.2993599806786622e-05, "loss": 0.4147, "step": 6129, "task_loss": 0.9220321774482727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35033851861953735, "epoch": 5.18, "learning_rate": 1.298756188866079e-05, "loss": 0.2884, "step": 6130, "task_loss": 0.5959760546684265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3039707839488983, "epoch": 5.18, "learning_rate": 1.298152397053496e-05, "loss": 0.3763, "step": 6131, "task_loss": 1.022685170173645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2831045687198639, "epoch": 5.18, "learning_rate": 1.297548605240913e-05, "loss": 0.4678, "step": 6132, "task_loss": 1.0012485980987549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25534483790397644, "epoch": 5.18, "learning_rate": 1.2969448134283299e-05, "loss": 0.3552, "step": 6133, "task_loss": 0.47118574380874634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4116463363170624, "epoch": 5.19, "learning_rate": 1.2963410216157471e-05, "loss": 0.4339, "step": 6134, "task_loss": 1.2735434770584106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42163026332855225, "epoch": 5.19, "learning_rate": 1.295737229803164e-05, "loss": 0.3343, "step": 6135, "task_loss": 0.3961406350135803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3167825937271118, "epoch": 5.19, "learning_rate": 1.2951334379905807e-05, "loss": 0.312, "step": 6136, "task_loss": 0.9158685207366943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2628577947616577, "epoch": 5.19, "learning_rate": 1.294529646177998e-05, "loss": 0.2573, "step": 6137, "task_loss": 0.8943985104560852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3221127986907959, "epoch": 5.19, "learning_rate": 1.2939258543654148e-05, "loss": 0.3792, "step": 6138, "task_loss": 0.5870867371559143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22237321734428406, "epoch": 5.19, "learning_rate": 1.2933220625528319e-05, "loss": 0.3373, "step": 6139, "task_loss": 0.23264789581298828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3464910089969635, "epoch": 5.19, "learning_rate": 1.2927182707402488e-05, "loss": 0.3002, "step": 6140, "task_loss": 0.13752098381519318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37275606393814087, "epoch": 5.19, "learning_rate": 1.2921144789276657e-05, "loss": 0.4058, "step": 6141, "task_loss": 0.7023574709892273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2724091410636902, "epoch": 5.19, "learning_rate": 1.2915106871150829e-05, "loss": 0.3805, "step": 6142, "task_loss": 0.3080630898475647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19537466764450073, "epoch": 5.19, "learning_rate": 1.2909068953024998e-05, "loss": 0.3857, "step": 6143, "task_loss": 0.03084198385477066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32123684883117676, "epoch": 5.19, "learning_rate": 1.2903031034899168e-05, "loss": 0.3603, "step": 6144, "task_loss": 0.8758782148361206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2913019061088562, "epoch": 5.19, "learning_rate": 1.2896993116773337e-05, "loss": 0.3692, "step": 6145, "task_loss": 0.6344912648200989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2206139713525772, "epoch": 5.2, "learning_rate": 1.2890955198647506e-05, "loss": 0.2869, "step": 6146, "task_loss": 0.4664731025695801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3147173821926117, "epoch": 5.2, "learning_rate": 1.2884917280521677e-05, "loss": 0.3458, "step": 6147, "task_loss": 0.37597644329071045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3244105577468872, "epoch": 5.2, "learning_rate": 1.2878879362395846e-05, "loss": 0.3274, "step": 6148, "task_loss": 0.4794621467590332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23785562813282013, "epoch": 5.2, "learning_rate": 1.2872841444270018e-05, "loss": 0.2495, "step": 6149, "task_loss": 0.4986094534397125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2445751279592514, "epoch": 5.2, "learning_rate": 1.2866803526144187e-05, "loss": 0.309, "step": 6150, "task_loss": 0.49800214171409607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24269261956214905, "epoch": 5.2, "learning_rate": 1.2860765608018356e-05, "loss": 0.245, "step": 6151, "task_loss": 0.1945325881242752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23869682848453522, "epoch": 5.2, "learning_rate": 1.2854727689892526e-05, "loss": 0.3315, "step": 6152, "task_loss": 0.25807827711105347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30895864963531494, "epoch": 5.2, "learning_rate": 1.2848689771766695e-05, "loss": 0.4236, "step": 6153, "task_loss": 0.9018588066101074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33351951837539673, "epoch": 5.2, "learning_rate": 1.2842651853640866e-05, "loss": 0.3955, "step": 6154, "task_loss": 0.571591317653656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4924083948135376, "epoch": 5.2, "learning_rate": 1.2836613935515034e-05, "loss": 0.3707, "step": 6155, "task_loss": 0.6678913235664368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3240179121494293, "epoch": 5.2, "learning_rate": 1.2830576017389203e-05, "loss": 0.4535, "step": 6156, "task_loss": 0.9701945781707764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3131393790245056, "epoch": 5.2, "learning_rate": 1.2824538099263376e-05, "loss": 0.3435, "step": 6157, "task_loss": 1.077248215675354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25958964228630066, "epoch": 5.21, "learning_rate": 1.2818500181137544e-05, "loss": 0.2798, "step": 6158, "task_loss": 1.0155028104782104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33711108565330505, "epoch": 5.21, "learning_rate": 1.2812462263011715e-05, "loss": 0.3376, "step": 6159, "task_loss": 0.6857681274414062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38741570711135864, "epoch": 5.21, "learning_rate": 1.2806424344885884e-05, "loss": 0.3504, "step": 6160, "task_loss": 0.08645164966583252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39062756299972534, "epoch": 5.21, "learning_rate": 1.2800386426760053e-05, "loss": 0.4032, "step": 6161, "task_loss": 0.45058849453926086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3199421763420105, "epoch": 5.21, "learning_rate": 1.2794348508634223e-05, "loss": 0.3579, "step": 6162, "task_loss": 1.3548699617385864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3827421963214874, "epoch": 5.21, "learning_rate": 1.2788310590508392e-05, "loss": 0.4003, "step": 6163, "task_loss": 0.8355470299720764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25682827830314636, "epoch": 5.21, "learning_rate": 1.2782272672382565e-05, "loss": 0.2879, "step": 6164, "task_loss": 0.3149838149547577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3835751414299011, "epoch": 5.21, "learning_rate": 1.2776234754256733e-05, "loss": 0.3427, "step": 6165, "task_loss": 0.7306942343711853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41317230463027954, "epoch": 5.21, "learning_rate": 1.2770196836130902e-05, "loss": 0.313, "step": 6166, "task_loss": 0.5252606272697449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3353908061981201, "epoch": 5.21, "learning_rate": 1.2764158918005073e-05, "loss": 0.5522, "step": 6167, "task_loss": 0.4571850299835205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29919910430908203, "epoch": 5.21, "learning_rate": 1.2758120999879242e-05, "loss": 0.4503, "step": 6168, "task_loss": 0.559490978717804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17729710042476654, "epoch": 5.21, "learning_rate": 1.2752083081753414e-05, "loss": 0.3044, "step": 6169, "task_loss": 0.6245282292366028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3691714406013489, "epoch": 5.22, "learning_rate": 1.2746045163627581e-05, "loss": 0.392, "step": 6170, "task_loss": 0.8358081579208374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3556380867958069, "epoch": 5.22, "learning_rate": 1.274000724550175e-05, "loss": 0.3019, "step": 6171, "task_loss": 1.2497812509536743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3252919316291809, "epoch": 5.22, "learning_rate": 1.2733969327375922e-05, "loss": 0.2719, "step": 6172, "task_loss": 0.22679415345191956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30526769161224365, "epoch": 5.22, "learning_rate": 1.2727931409250091e-05, "loss": 0.3582, "step": 6173, "task_loss": 0.3815959393978119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20581591129302979, "epoch": 5.22, "learning_rate": 1.2721893491124262e-05, "loss": 0.2709, "step": 6174, "task_loss": 0.5766567587852478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28438088297843933, "epoch": 5.22, "learning_rate": 1.271585557299843e-05, "loss": 0.3584, "step": 6175, "task_loss": 0.8621641993522644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22922179102897644, "epoch": 5.22, "learning_rate": 1.27098176548726e-05, "loss": 0.3677, "step": 6176, "task_loss": 1.1901612281799316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2520645558834076, "epoch": 5.22, "learning_rate": 1.270377973674677e-05, "loss": 0.2889, "step": 6177, "task_loss": 0.5251764059066772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42223060131073, "epoch": 5.22, "learning_rate": 1.2697741818620939e-05, "loss": 0.3249, "step": 6178, "task_loss": 0.5730174779891968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40528059005737305, "epoch": 5.22, "learning_rate": 1.2691703900495111e-05, "loss": 0.3887, "step": 6179, "task_loss": 0.7383702993392944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33856773376464844, "epoch": 5.22, "learning_rate": 1.268566598236928e-05, "loss": 0.3267, "step": 6180, "task_loss": 0.12666042149066925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23978902399539948, "epoch": 5.22, "learning_rate": 1.2679628064243449e-05, "loss": 0.342, "step": 6181, "task_loss": 0.7003610134124756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3102791905403137, "epoch": 5.23, "learning_rate": 1.267359014611762e-05, "loss": 0.3423, "step": 6182, "task_loss": 0.27375224232673645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33721059560775757, "epoch": 5.23, "learning_rate": 1.2667552227991788e-05, "loss": 0.3761, "step": 6183, "task_loss": 1.0899288654327393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.304368257522583, "epoch": 5.23, "learning_rate": 1.266151430986596e-05, "loss": 0.3312, "step": 6184, "task_loss": 0.5498567819595337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2091372311115265, "epoch": 5.23, "learning_rate": 1.2655476391740128e-05, "loss": 0.35, "step": 6185, "task_loss": 0.8747664093971252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2940012812614441, "epoch": 5.23, "learning_rate": 1.2649438473614297e-05, "loss": 0.3603, "step": 6186, "task_loss": 0.5039197206497192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3247738480567932, "epoch": 5.23, "learning_rate": 1.2643400555488469e-05, "loss": 0.3542, "step": 6187, "task_loss": 0.6763137578964233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2996874153614044, "epoch": 5.23, "learning_rate": 1.2637362637362638e-05, "loss": 0.3343, "step": 6188, "task_loss": 0.10214655846357346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24176523089408875, "epoch": 5.23, "learning_rate": 1.2631324719236808e-05, "loss": 0.3518, "step": 6189, "task_loss": 0.06865212321281433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24526596069335938, "epoch": 5.23, "learning_rate": 1.2625286801110977e-05, "loss": 0.3348, "step": 6190, "task_loss": 0.16902893781661987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.288657546043396, "epoch": 5.23, "learning_rate": 1.2619248882985146e-05, "loss": 0.2925, "step": 6191, "task_loss": 0.6051961183547974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4819086790084839, "epoch": 5.23, "learning_rate": 1.2613210964859318e-05, "loss": 0.4747, "step": 6192, "task_loss": 0.511467456817627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3642745018005371, "epoch": 5.23, "learning_rate": 1.2607173046733486e-05, "loss": 0.3714, "step": 6193, "task_loss": 0.6489524841308594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2621919512748718, "epoch": 5.24, "learning_rate": 1.2601135128607658e-05, "loss": 0.3718, "step": 6194, "task_loss": 0.5429467558860779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3021751642227173, "epoch": 5.24, "learning_rate": 1.2595097210481827e-05, "loss": 0.4115, "step": 6195, "task_loss": 0.16272735595703125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3056473433971405, "epoch": 5.24, "learning_rate": 1.2589059292355996e-05, "loss": 0.2401, "step": 6196, "task_loss": 0.4716377854347229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3153030574321747, "epoch": 5.24, "learning_rate": 1.2583021374230166e-05, "loss": 0.3576, "step": 6197, "task_loss": 0.6170608997344971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.172945037484169, "epoch": 5.24, "learning_rate": 1.2576983456104335e-05, "loss": 0.2462, "step": 6198, "task_loss": 0.03066607192158699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28508460521698, "epoch": 5.24, "learning_rate": 1.2570945537978507e-05, "loss": 0.2893, "step": 6199, "task_loss": 0.5536911487579346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3145662546157837, "epoch": 5.24, "learning_rate": 1.2564907619852676e-05, "loss": 0.3622, "step": 6200, "task_loss": 0.32376828789711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35830169916152954, "epoch": 5.24, "learning_rate": 1.2558869701726843e-05, "loss": 0.3457, "step": 6201, "task_loss": 0.024026796221733093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3191727101802826, "epoch": 5.24, "learning_rate": 1.2552831783601016e-05, "loss": 0.4014, "step": 6202, "task_loss": 0.5174000263214111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22723479568958282, "epoch": 5.24, "learning_rate": 1.2546793865475185e-05, "loss": 0.3265, "step": 6203, "task_loss": 0.1360616683959961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.12899303436279297, "epoch": 5.24, "learning_rate": 1.2540755947349355e-05, "loss": 0.4098, "step": 6204, "task_loss": 0.07005707174539566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38357898592948914, "epoch": 5.24, "learning_rate": 1.2534718029223524e-05, "loss": 0.4205, "step": 6205, "task_loss": 0.5740840435028076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3298664689064026, "epoch": 5.25, "learning_rate": 1.2528680111097693e-05, "loss": 0.3223, "step": 6206, "task_loss": 0.6729705929756165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37544015049934387, "epoch": 5.25, "learning_rate": 1.2522642192971865e-05, "loss": 0.3951, "step": 6207, "task_loss": 0.9335683584213257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22019603848457336, "epoch": 5.25, "learning_rate": 1.2516604274846034e-05, "loss": 0.2821, "step": 6208, "task_loss": 0.06558017432689667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38010916113853455, "epoch": 5.25, "learning_rate": 1.2510566356720205e-05, "loss": 0.3486, "step": 6209, "task_loss": 0.44799891114234924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4333375096321106, "epoch": 5.25, "learning_rate": 1.2504528438594374e-05, "loss": 0.337, "step": 6210, "task_loss": 0.7560638785362244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36069315671920776, "epoch": 5.25, "learning_rate": 1.2498490520468544e-05, "loss": 0.4076, "step": 6211, "task_loss": 0.7118860483169556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33114612102508545, "epoch": 5.25, "learning_rate": 1.2492452602342713e-05, "loss": 0.3037, "step": 6212, "task_loss": 0.25313568115234375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17261265218257904, "epoch": 5.25, "learning_rate": 1.2486414684216882e-05, "loss": 0.3066, "step": 6213, "task_loss": 0.14460411667823792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5320208072662354, "epoch": 5.25, "learning_rate": 1.2480376766091052e-05, "loss": 0.364, "step": 6214, "task_loss": 0.9836165904998779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.332232803106308, "epoch": 5.25, "learning_rate": 1.2474338847965223e-05, "loss": 0.3613, "step": 6215, "task_loss": 1.7846333980560303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5287883877754211, "epoch": 5.25, "learning_rate": 1.2468300929839392e-05, "loss": 0.4037, "step": 6216, "task_loss": 0.9834396243095398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20216219127178192, "epoch": 5.26, "learning_rate": 1.246226301171356e-05, "loss": 0.3146, "step": 6217, "task_loss": 0.7857782244682312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.11256629228591919, "epoch": 5.26, "learning_rate": 1.2456225093587731e-05, "loss": 0.3668, "step": 6218, "task_loss": 0.6520318984985352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23906856775283813, "epoch": 5.26, "learning_rate": 1.2450187175461902e-05, "loss": 0.2645, "step": 6219, "task_loss": 0.10917194187641144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3960825204849243, "epoch": 5.26, "learning_rate": 1.244414925733607e-05, "loss": 0.4636, "step": 6220, "task_loss": 1.1172658205032349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47023090720176697, "epoch": 5.26, "learning_rate": 1.2438111339210241e-05, "loss": 0.324, "step": 6221, "task_loss": 0.11548452079296112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2478490024805069, "epoch": 5.26, "learning_rate": 1.243207342108441e-05, "loss": 0.365, "step": 6222, "task_loss": 0.29491618275642395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.341747522354126, "epoch": 5.26, "learning_rate": 1.242603550295858e-05, "loss": 0.3501, "step": 6223, "task_loss": 0.07914337515830994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3009715676307678, "epoch": 5.26, "learning_rate": 1.241999758483275e-05, "loss": 0.3248, "step": 6224, "task_loss": 0.20474757254123688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35846197605133057, "epoch": 5.26, "learning_rate": 1.241395966670692e-05, "loss": 0.4561, "step": 6225, "task_loss": 1.796678900718689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3405478000640869, "epoch": 5.26, "learning_rate": 1.240792174858109e-05, "loss": 0.2751, "step": 6226, "task_loss": 0.3981640040874481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2257852852344513, "epoch": 5.26, "learning_rate": 1.240188383045526e-05, "loss": 0.282, "step": 6227, "task_loss": 0.6181155443191528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4802110493183136, "epoch": 5.26, "learning_rate": 1.2395845912329429e-05, "loss": 0.5305, "step": 6228, "task_loss": 1.53773033618927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3122127056121826, "epoch": 5.27, "learning_rate": 1.2389807994203599e-05, "loss": 0.3895, "step": 6229, "task_loss": 0.6593275666236877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42442190647125244, "epoch": 5.27, "learning_rate": 1.238377007607777e-05, "loss": 0.3698, "step": 6230, "task_loss": 0.5137786865234375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1526576727628708, "epoch": 5.27, "learning_rate": 1.2377732157951939e-05, "loss": 0.3605, "step": 6231, "task_loss": 0.27580684423446655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13213349878787994, "epoch": 5.27, "learning_rate": 1.2371694239826107e-05, "loss": 0.2818, "step": 6232, "task_loss": 0.007626876700669527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.362631618976593, "epoch": 5.27, "learning_rate": 1.2365656321700278e-05, "loss": 0.4389, "step": 6233, "task_loss": 1.4358383417129517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3468567728996277, "epoch": 5.27, "learning_rate": 1.2359618403574449e-05, "loss": 0.3937, "step": 6234, "task_loss": 0.7529078722000122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24047896265983582, "epoch": 5.27, "learning_rate": 1.2353580485448617e-05, "loss": 0.3178, "step": 6235, "task_loss": 0.3229711651802063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28388702869415283, "epoch": 5.27, "learning_rate": 1.2347542567322788e-05, "loss": 0.3025, "step": 6236, "task_loss": 0.18398110568523407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2693178057670593, "epoch": 5.27, "learning_rate": 1.2341504649196957e-05, "loss": 0.3602, "step": 6237, "task_loss": 0.23369964957237244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28788554668426514, "epoch": 5.27, "learning_rate": 1.2335466731071127e-05, "loss": 0.3637, "step": 6238, "task_loss": 0.7101067900657654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4533052444458008, "epoch": 5.27, "learning_rate": 1.2329428812945296e-05, "loss": 0.2437, "step": 6239, "task_loss": 0.25191420316696167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6809208989143372, "epoch": 5.27, "learning_rate": 1.2323390894819467e-05, "loss": 0.4106, "step": 6240, "task_loss": 0.18395569920539856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42141878604888916, "epoch": 5.28, "learning_rate": 1.2317352976693638e-05, "loss": 0.3829, "step": 6241, "task_loss": 1.8422088623046875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3621111810207367, "epoch": 5.28, "learning_rate": 1.2311315058567806e-05, "loss": 0.3873, "step": 6242, "task_loss": 0.42420631647109985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2229287028312683, "epoch": 5.28, "learning_rate": 1.2305277140441975e-05, "loss": 0.2886, "step": 6243, "task_loss": 1.1286773681640625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14864695072174072, "epoch": 5.28, "learning_rate": 1.2299239222316146e-05, "loss": 0.2893, "step": 6244, "task_loss": 0.49230262637138367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44227105379104614, "epoch": 5.28, "learning_rate": 1.2293201304190316e-05, "loss": 0.3493, "step": 6245, "task_loss": 0.756534218788147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30127546191215515, "epoch": 5.28, "learning_rate": 1.2287163386064485e-05, "loss": 0.2909, "step": 6246, "task_loss": 0.41279852390289307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3151090145111084, "epoch": 5.28, "learning_rate": 1.2281125467938654e-05, "loss": 0.3432, "step": 6247, "task_loss": 0.5278312563896179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.257122278213501, "epoch": 5.28, "learning_rate": 1.2275087549812825e-05, "loss": 0.3361, "step": 6248, "task_loss": 0.31265756487846375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37331220507621765, "epoch": 5.28, "learning_rate": 1.2269049631686995e-05, "loss": 0.3065, "step": 6249, "task_loss": 0.9086564183235168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.8060978651046753, "epoch": 5.28, "learning_rate": 1.2263011713561164e-05, "loss": 0.3816, "step": 6250, "task_loss": 0.9480526447296143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.11931002885103226, "epoch": 5.28, "learning_rate": 1.2256973795435335e-05, "loss": 0.3743, "step": 6251, "task_loss": 1.4030017852783203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24361653625965118, "epoch": 5.28, "learning_rate": 1.2250935877309504e-05, "loss": 0.3074, "step": 6252, "task_loss": 0.42411693930625916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19679933786392212, "epoch": 5.29, "learning_rate": 1.2244897959183674e-05, "loss": 0.297, "step": 6253, "task_loss": 0.3685755729675293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33711156249046326, "epoch": 5.29, "learning_rate": 1.2238860041057843e-05, "loss": 0.3549, "step": 6254, "task_loss": 0.39033156633377075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42940065264701843, "epoch": 5.29, "learning_rate": 1.2232822122932014e-05, "loss": 0.3607, "step": 6255, "task_loss": 0.5829775929450989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4093332588672638, "epoch": 5.29, "learning_rate": 1.2226784204806184e-05, "loss": 0.3752, "step": 6256, "task_loss": 0.6832465529441833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44392284750938416, "epoch": 5.29, "learning_rate": 1.2220746286680353e-05, "loss": 0.3193, "step": 6257, "task_loss": 0.4856323301792145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3675299882888794, "epoch": 5.29, "learning_rate": 1.2214708368554522e-05, "loss": 0.3749, "step": 6258, "task_loss": 0.2518847584724426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28155189752578735, "epoch": 5.29, "learning_rate": 1.2208670450428693e-05, "loss": 0.3063, "step": 6259, "task_loss": 0.14834855496883392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24474994838237762, "epoch": 5.29, "learning_rate": 1.2202632532302863e-05, "loss": 0.2898, "step": 6260, "task_loss": 1.0954759120941162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.329365074634552, "epoch": 5.29, "learning_rate": 1.2196594614177034e-05, "loss": 0.2941, "step": 6261, "task_loss": 0.6300785541534424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4638823866844177, "epoch": 5.29, "learning_rate": 1.2190556696051201e-05, "loss": 0.3236, "step": 6262, "task_loss": 0.3575841784477234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21790829300880432, "epoch": 5.29, "learning_rate": 1.2184518777925371e-05, "loss": 0.387, "step": 6263, "task_loss": 0.03688498213887215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32300394773483276, "epoch": 5.29, "learning_rate": 1.2178480859799542e-05, "loss": 0.2986, "step": 6264, "task_loss": 0.8989654779434204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4282037615776062, "epoch": 5.3, "learning_rate": 1.2172442941673713e-05, "loss": 0.4113, "step": 6265, "task_loss": 0.46589550375938416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24786126613616943, "epoch": 5.3, "learning_rate": 1.2166405023547881e-05, "loss": 0.2593, "step": 6266, "task_loss": 0.6012431383132935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16317735612392426, "epoch": 5.3, "learning_rate": 1.216036710542205e-05, "loss": 0.2527, "step": 6267, "task_loss": 0.21284738183021545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.235362708568573, "epoch": 5.3, "learning_rate": 1.2154329187296221e-05, "loss": 0.2964, "step": 6268, "task_loss": 0.619866669178009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28894931077957153, "epoch": 5.3, "learning_rate": 1.2148291269170391e-05, "loss": 0.3034, "step": 6269, "task_loss": 0.4437788724899292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26737791299819946, "epoch": 5.3, "learning_rate": 1.214225335104456e-05, "loss": 0.3922, "step": 6270, "task_loss": 0.10015563666820526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24855734407901764, "epoch": 5.3, "learning_rate": 1.2136215432918731e-05, "loss": 0.3044, "step": 6271, "task_loss": 0.3466190695762634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3702094852924347, "epoch": 5.3, "learning_rate": 1.21301775147929e-05, "loss": 0.4255, "step": 6272, "task_loss": 0.8145326972007751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1744307428598404, "epoch": 5.3, "learning_rate": 1.212413959666707e-05, "loss": 0.4394, "step": 6273, "task_loss": 0.700520396232605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1946127861738205, "epoch": 5.3, "learning_rate": 1.211810167854124e-05, "loss": 0.2663, "step": 6274, "task_loss": 0.1640913188457489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19971001148223877, "epoch": 5.3, "learning_rate": 1.211206376041541e-05, "loss": 0.2765, "step": 6275, "task_loss": 0.0745186135172844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4417300224304199, "epoch": 5.3, "learning_rate": 1.210602584228958e-05, "loss": 0.3421, "step": 6276, "task_loss": 0.8180645108222961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4597139358520508, "epoch": 5.31, "learning_rate": 1.209998792416375e-05, "loss": 0.3635, "step": 6277, "task_loss": 1.4477885961532593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40509840846061707, "epoch": 5.31, "learning_rate": 1.2093950006037918e-05, "loss": 0.4231, "step": 6278, "task_loss": 0.19797205924987793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2730720043182373, "epoch": 5.31, "learning_rate": 1.2087912087912089e-05, "loss": 0.2955, "step": 6279, "task_loss": 0.15189561247825623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5157040357589722, "epoch": 5.31, "learning_rate": 1.208187416978626e-05, "loss": 0.4864, "step": 6280, "task_loss": 1.1942907571792603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1536349654197693, "epoch": 5.31, "learning_rate": 1.2075836251660428e-05, "loss": 0.2803, "step": 6281, "task_loss": 0.4165586233139038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24527767300605774, "epoch": 5.31, "learning_rate": 1.2069798333534597e-05, "loss": 0.3312, "step": 6282, "task_loss": 0.441110223531723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28915664553642273, "epoch": 5.31, "learning_rate": 1.2063760415408768e-05, "loss": 0.2834, "step": 6283, "task_loss": 0.797798216342926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2825828790664673, "epoch": 5.31, "learning_rate": 1.2057722497282938e-05, "loss": 0.3663, "step": 6284, "task_loss": 1.0362539291381836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44979166984558105, "epoch": 5.31, "learning_rate": 1.2051684579157107e-05, "loss": 0.4127, "step": 6285, "task_loss": 0.8441143035888672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3118323087692261, "epoch": 5.31, "learning_rate": 1.2045646661031278e-05, "loss": 0.4232, "step": 6286, "task_loss": 0.2029878944158554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27125027775764465, "epoch": 5.31, "learning_rate": 1.2039608742905447e-05, "loss": 0.2623, "step": 6287, "task_loss": 0.18991900980472565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3351503610610962, "epoch": 5.32, "learning_rate": 1.2033570824779617e-05, "loss": 0.3186, "step": 6288, "task_loss": 0.6485171318054199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1999504268169403, "epoch": 5.32, "learning_rate": 1.2027532906653786e-05, "loss": 0.2994, "step": 6289, "task_loss": 0.2166719287633896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43581557273864746, "epoch": 5.32, "learning_rate": 1.2021494988527957e-05, "loss": 0.3975, "step": 6290, "task_loss": 0.38639700412750244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3912535309791565, "epoch": 5.32, "learning_rate": 1.2015457070402127e-05, "loss": 0.3327, "step": 6291, "task_loss": 0.6028567552566528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3146582543849945, "epoch": 5.32, "learning_rate": 1.2009419152276296e-05, "loss": 0.3661, "step": 6292, "task_loss": 0.5521934032440186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42800208926200867, "epoch": 5.32, "learning_rate": 1.2003381234150465e-05, "loss": 0.2273, "step": 6293, "task_loss": 0.8956670761108398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5516294240951538, "epoch": 5.32, "learning_rate": 1.1997343316024635e-05, "loss": 0.376, "step": 6294, "task_loss": 0.4495631456375122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40593141317367554, "epoch": 5.32, "learning_rate": 1.1991305397898806e-05, "loss": 0.3972, "step": 6295, "task_loss": 0.6054657697677612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33347266912460327, "epoch": 5.32, "learning_rate": 1.1985267479772975e-05, "loss": 0.3245, "step": 6296, "task_loss": 0.7480327486991882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42218226194381714, "epoch": 5.32, "learning_rate": 1.1979229561647144e-05, "loss": 0.3584, "step": 6297, "task_loss": 1.0350046157836914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18644389510154724, "epoch": 5.32, "learning_rate": 1.1973191643521314e-05, "loss": 0.2938, "step": 6298, "task_loss": 1.0324218273162842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49501922726631165, "epoch": 5.32, "learning_rate": 1.1967153725395485e-05, "loss": 0.3737, "step": 6299, "task_loss": 0.7755611538887024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31086671352386475, "epoch": 5.33, "learning_rate": 1.1961115807269654e-05, "loss": 0.3258, "step": 6300, "task_loss": 0.7166301608085632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.593705415725708, "epoch": 5.33, "learning_rate": 1.1955077889143823e-05, "loss": 0.4504, "step": 6301, "task_loss": 1.136077880859375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3945770263671875, "epoch": 5.33, "learning_rate": 1.1949039971017993e-05, "loss": 0.3887, "step": 6302, "task_loss": 0.6139612793922424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3017728328704834, "epoch": 5.33, "learning_rate": 1.1943002052892164e-05, "loss": 0.2925, "step": 6303, "task_loss": 0.9991453886032104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19045156240463257, "epoch": 5.33, "learning_rate": 1.1936964134766333e-05, "loss": 0.3074, "step": 6304, "task_loss": 1.3109303712844849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4657083749771118, "epoch": 5.33, "learning_rate": 1.1930926216640503e-05, "loss": 0.4108, "step": 6305, "task_loss": 0.18656061589717865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2524222433567047, "epoch": 5.33, "learning_rate": 1.1924888298514672e-05, "loss": 0.3766, "step": 6306, "task_loss": 0.36427852511405945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17602214217185974, "epoch": 5.33, "learning_rate": 1.1918850380388843e-05, "loss": 0.4032, "step": 6307, "task_loss": 0.414278507232666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5255146622657776, "epoch": 5.33, "learning_rate": 1.1912812462263012e-05, "loss": 0.5261, "step": 6308, "task_loss": 0.5358645915985107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40465956926345825, "epoch": 5.33, "learning_rate": 1.1906774544137182e-05, "loss": 0.3326, "step": 6309, "task_loss": 1.0865763425827026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2556668519973755, "epoch": 5.33, "learning_rate": 1.1900736626011353e-05, "loss": 0.2507, "step": 6310, "task_loss": 0.038273368030786514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28134390711784363, "epoch": 5.33, "learning_rate": 1.1894698707885522e-05, "loss": 0.3193, "step": 6311, "task_loss": 0.24123382568359375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2736983299255371, "epoch": 5.34, "learning_rate": 1.188866078975969e-05, "loss": 0.2766, "step": 6312, "task_loss": 0.35634517669677734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6002137660980225, "epoch": 5.34, "learning_rate": 1.1882622871633861e-05, "loss": 0.3823, "step": 6313, "task_loss": 0.9333024621009827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21932122111320496, "epoch": 5.34, "learning_rate": 1.1876584953508032e-05, "loss": 0.3007, "step": 6314, "task_loss": 0.18886108696460724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1932658702135086, "epoch": 5.34, "learning_rate": 1.18705470353822e-05, "loss": 0.3345, "step": 6315, "task_loss": 0.6704120635986328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20734122395515442, "epoch": 5.34, "learning_rate": 1.186450911725637e-05, "loss": 0.2345, "step": 6316, "task_loss": 0.23485705256462097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37533900141716003, "epoch": 5.34, "learning_rate": 1.185847119913054e-05, "loss": 0.3713, "step": 6317, "task_loss": 0.7573323249816895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2483510673046112, "epoch": 5.34, "learning_rate": 1.185243328100471e-05, "loss": 0.2538, "step": 6318, "task_loss": 0.0718422383069992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7714090347290039, "epoch": 5.34, "learning_rate": 1.184639536287888e-05, "loss": 0.4804, "step": 6319, "task_loss": 1.1833066940307617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32466182112693787, "epoch": 5.34, "learning_rate": 1.184035744475305e-05, "loss": 0.3404, "step": 6320, "task_loss": 1.1852580308914185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3524268865585327, "epoch": 5.34, "learning_rate": 1.1834319526627219e-05, "loss": 0.3548, "step": 6321, "task_loss": 1.0294883251190186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31361204385757446, "epoch": 5.34, "learning_rate": 1.182828160850139e-05, "loss": 0.3313, "step": 6322, "task_loss": 1.1710971593856812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1630479395389557, "epoch": 5.34, "learning_rate": 1.1822243690375558e-05, "loss": 0.2964, "step": 6323, "task_loss": 0.2180536836385727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6554901599884033, "epoch": 5.35, "learning_rate": 1.1816205772249729e-05, "loss": 0.3051, "step": 6324, "task_loss": 0.4468268156051636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28422144055366516, "epoch": 5.35, "learning_rate": 1.18101678541239e-05, "loss": 0.3834, "step": 6325, "task_loss": 0.6182525157928467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3679421842098236, "epoch": 5.35, "learning_rate": 1.1804129935998068e-05, "loss": 0.3848, "step": 6326, "task_loss": 0.18888860940933228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4132999777793884, "epoch": 5.35, "learning_rate": 1.1798092017872237e-05, "loss": 0.3381, "step": 6327, "task_loss": 0.9117360711097717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4080709218978882, "epoch": 5.35, "learning_rate": 1.1792054099746408e-05, "loss": 0.3871, "step": 6328, "task_loss": 0.17206139862537384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46375614404678345, "epoch": 5.35, "learning_rate": 1.1786016181620578e-05, "loss": 0.4155, "step": 6329, "task_loss": 0.3504164218902588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2920553684234619, "epoch": 5.35, "learning_rate": 1.1779978263494749e-05, "loss": 0.3083, "step": 6330, "task_loss": 0.5817282795906067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3022729754447937, "epoch": 5.35, "learning_rate": 1.1773940345368916e-05, "loss": 0.4105, "step": 6331, "task_loss": 0.7006362676620483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24604696035385132, "epoch": 5.35, "learning_rate": 1.1767902427243087e-05, "loss": 0.2728, "step": 6332, "task_loss": 0.7288413047790527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2253427654504776, "epoch": 5.35, "learning_rate": 1.1761864509117257e-05, "loss": 0.27, "step": 6333, "task_loss": 0.2924199402332306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2735265791416168, "epoch": 5.35, "learning_rate": 1.1755826590991428e-05, "loss": 0.3448, "step": 6334, "task_loss": 0.22853896021842957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4498865604400635, "epoch": 5.35, "learning_rate": 1.1749788672865597e-05, "loss": 0.3681, "step": 6335, "task_loss": 0.45202597975730896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2804650664329529, "epoch": 5.36, "learning_rate": 1.1743750754739766e-05, "loss": 0.3108, "step": 6336, "task_loss": 0.13311727344989777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5018231272697449, "epoch": 5.36, "learning_rate": 1.1737712836613936e-05, "loss": 0.4435, "step": 6337, "task_loss": 1.4413191080093384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3525571823120117, "epoch": 5.36, "learning_rate": 1.1731674918488107e-05, "loss": 0.3712, "step": 6338, "task_loss": 0.1723225861787796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37212398648262024, "epoch": 5.36, "learning_rate": 1.1725637000362276e-05, "loss": 0.3686, "step": 6339, "task_loss": 0.5648358464241028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20454512536525726, "epoch": 5.36, "learning_rate": 1.1719599082236446e-05, "loss": 0.3331, "step": 6340, "task_loss": 0.6230940222740173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47362202405929565, "epoch": 5.36, "learning_rate": 1.1713561164110615e-05, "loss": 0.3338, "step": 6341, "task_loss": 0.991213858127594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39058977365493774, "epoch": 5.36, "learning_rate": 1.1707523245984786e-05, "loss": 0.5152, "step": 6342, "task_loss": 0.7793782949447632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1961914300918579, "epoch": 5.36, "learning_rate": 1.1701485327858954e-05, "loss": 0.2892, "step": 6343, "task_loss": 0.4959262013435364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3209036886692047, "epoch": 5.36, "learning_rate": 1.1695447409733125e-05, "loss": 0.3821, "step": 6344, "task_loss": 0.6691746711730957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31303274631500244, "epoch": 5.36, "learning_rate": 1.1689409491607296e-05, "loss": 0.3738, "step": 6345, "task_loss": 0.47523629665374756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23777538537979126, "epoch": 5.36, "learning_rate": 1.1683371573481463e-05, "loss": 0.2305, "step": 6346, "task_loss": 0.30125951766967773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.319135844707489, "epoch": 5.36, "learning_rate": 1.1677333655355633e-05, "loss": 0.363, "step": 6347, "task_loss": 0.9870565533638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19521431624889374, "epoch": 5.37, "learning_rate": 1.1671295737229804e-05, "loss": 0.3109, "step": 6348, "task_loss": 0.567024827003479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23089802265167236, "epoch": 5.37, "learning_rate": 1.1665257819103974e-05, "loss": 0.2897, "step": 6349, "task_loss": 0.2796730399131775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46716445684432983, "epoch": 5.37, "learning_rate": 1.1659219900978143e-05, "loss": 0.4387, "step": 6350, "task_loss": 1.4989261627197266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2217223197221756, "epoch": 5.37, "learning_rate": 1.1653181982852312e-05, "loss": 0.2681, "step": 6351, "task_loss": 0.480746865272522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3414226472377777, "epoch": 5.37, "learning_rate": 1.1647144064726483e-05, "loss": 0.2858, "step": 6352, "task_loss": 0.22240887582302094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.372541218996048, "epoch": 5.37, "learning_rate": 1.1641106146600653e-05, "loss": 0.3115, "step": 6353, "task_loss": 1.6872429847717285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3380688428878784, "epoch": 5.37, "learning_rate": 1.1635068228474822e-05, "loss": 0.3792, "step": 6354, "task_loss": 0.961943507194519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38904497027397156, "epoch": 5.37, "learning_rate": 1.1629030310348993e-05, "loss": 0.4344, "step": 6355, "task_loss": 0.38759711384773254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.339015394449234, "epoch": 5.37, "learning_rate": 1.1622992392223162e-05, "loss": 0.3435, "step": 6356, "task_loss": 0.5462979674339294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44513943791389465, "epoch": 5.37, "learning_rate": 1.1616954474097332e-05, "loss": 0.3381, "step": 6357, "task_loss": 0.22143439948558807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17341797053813934, "epoch": 5.37, "learning_rate": 1.1610916555971501e-05, "loss": 0.2455, "step": 6358, "task_loss": 0.3889668881893158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2780899107456207, "epoch": 5.38, "learning_rate": 1.1604878637845672e-05, "loss": 0.3839, "step": 6359, "task_loss": 0.6513873338699341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30718332529067993, "epoch": 5.38, "learning_rate": 1.1598840719719842e-05, "loss": 0.3117, "step": 6360, "task_loss": 0.8444777727127075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4501781761646271, "epoch": 5.38, "learning_rate": 1.1592802801594011e-05, "loss": 0.3444, "step": 6361, "task_loss": 0.3925710618495941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23263026773929596, "epoch": 5.38, "learning_rate": 1.158676488346818e-05, "loss": 0.3163, "step": 6362, "task_loss": 0.42162108421325684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2251611351966858, "epoch": 5.38, "learning_rate": 1.158072696534235e-05, "loss": 0.3714, "step": 6363, "task_loss": 0.4196508526802063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28076377511024475, "epoch": 5.38, "learning_rate": 1.1574689047216521e-05, "loss": 0.3177, "step": 6364, "task_loss": 0.055793214589357376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18636472523212433, "epoch": 5.38, "learning_rate": 1.156865112909069e-05, "loss": 0.3291, "step": 6365, "task_loss": 0.1168462261557579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28110790252685547, "epoch": 5.38, "learning_rate": 1.1562613210964859e-05, "loss": 0.3759, "step": 6366, "task_loss": 0.22040513157844543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3729907274246216, "epoch": 5.38, "learning_rate": 1.155657529283903e-05, "loss": 0.3848, "step": 6367, "task_loss": 0.6905767321586609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2804126739501953, "epoch": 5.38, "learning_rate": 1.15505373747132e-05, "loss": 0.3439, "step": 6368, "task_loss": 0.6111408472061157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3105476498603821, "epoch": 5.38, "learning_rate": 1.1544499456587369e-05, "loss": 0.343, "step": 6369, "task_loss": 0.1049937903881073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2744455337524414, "epoch": 5.38, "learning_rate": 1.153846153846154e-05, "loss": 0.3473, "step": 6370, "task_loss": 0.48202091455459595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29008352756500244, "epoch": 5.39, "learning_rate": 1.1532423620335708e-05, "loss": 0.3597, "step": 6371, "task_loss": 0.9537291526794434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.253133624792099, "epoch": 5.39, "learning_rate": 1.1526385702209879e-05, "loss": 0.3389, "step": 6372, "task_loss": 0.3294081687927246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3151618242263794, "epoch": 5.39, "learning_rate": 1.1520347784084048e-05, "loss": 0.3363, "step": 6373, "task_loss": 0.46274226903915405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41134804487228394, "epoch": 5.39, "learning_rate": 1.1514309865958218e-05, "loss": 0.4383, "step": 6374, "task_loss": 0.6998343467712402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2937488853931427, "epoch": 5.39, "learning_rate": 1.1508271947832389e-05, "loss": 0.2782, "step": 6375, "task_loss": 0.28005310893058777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2960377037525177, "epoch": 5.39, "learning_rate": 1.1502234029706558e-05, "loss": 0.3183, "step": 6376, "task_loss": 0.47502827644348145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2841653525829315, "epoch": 5.39, "learning_rate": 1.1496196111580727e-05, "loss": 0.2382, "step": 6377, "task_loss": 0.4051593840122223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3819178342819214, "epoch": 5.39, "learning_rate": 1.1490158193454897e-05, "loss": 0.3523, "step": 6378, "task_loss": 0.22672003507614136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4021572172641754, "epoch": 5.39, "learning_rate": 1.1484120275329068e-05, "loss": 0.3202, "step": 6379, "task_loss": 0.5988771319389343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3904101848602295, "epoch": 5.39, "learning_rate": 1.1478082357203237e-05, "loss": 0.3308, "step": 6380, "task_loss": 1.032623291015625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.412116676568985, "epoch": 5.39, "learning_rate": 1.1472044439077406e-05, "loss": 0.3826, "step": 6381, "task_loss": 0.2904662787914276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3043474853038788, "epoch": 5.39, "learning_rate": 1.1466006520951576e-05, "loss": 0.2588, "step": 6382, "task_loss": 0.33787623047828674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26694267988204956, "epoch": 5.4, "learning_rate": 1.1459968602825747e-05, "loss": 0.3199, "step": 6383, "task_loss": 0.36456742882728577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3480791449546814, "epoch": 5.4, "learning_rate": 1.1453930684699916e-05, "loss": 0.3588, "step": 6384, "task_loss": 0.5897446274757385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5153487324714661, "epoch": 5.4, "learning_rate": 1.1447892766574086e-05, "loss": 0.4102, "step": 6385, "task_loss": 0.9948175549507141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25641685724258423, "epoch": 5.4, "learning_rate": 1.1441854848448255e-05, "loss": 0.3446, "step": 6386, "task_loss": 0.053557138890028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2511101961135864, "epoch": 5.4, "learning_rate": 1.1435816930322426e-05, "loss": 0.3445, "step": 6387, "task_loss": 0.5376349687576294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5033015608787537, "epoch": 5.4, "learning_rate": 1.1429779012196595e-05, "loss": 0.4451, "step": 6388, "task_loss": 0.5550081729888916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3386191427707672, "epoch": 5.4, "learning_rate": 1.1423741094070765e-05, "loss": 0.3449, "step": 6389, "task_loss": 0.18982401490211487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40206027030944824, "epoch": 5.4, "learning_rate": 1.1417703175944934e-05, "loss": 0.3542, "step": 6390, "task_loss": 0.9095295071601868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4196421205997467, "epoch": 5.4, "learning_rate": 1.1411665257819105e-05, "loss": 0.3875, "step": 6391, "task_loss": 0.7438188791275024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2917773127555847, "epoch": 5.4, "learning_rate": 1.1405627339693273e-05, "loss": 0.3127, "step": 6392, "task_loss": 0.7127941250801086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2950597405433655, "epoch": 5.4, "learning_rate": 1.1399589421567444e-05, "loss": 0.3894, "step": 6393, "task_loss": 0.32253992557525635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4115889072418213, "epoch": 5.4, "learning_rate": 1.1393551503441615e-05, "loss": 0.4302, "step": 6394, "task_loss": 0.7019715905189514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4123161733150482, "epoch": 5.41, "learning_rate": 1.1387513585315783e-05, "loss": 0.3038, "step": 6395, "task_loss": 0.49220648407936096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34132224321365356, "epoch": 5.41, "learning_rate": 1.1381475667189952e-05, "loss": 0.3589, "step": 6396, "task_loss": 0.8668553233146667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3156663179397583, "epoch": 5.41, "learning_rate": 1.1375437749064123e-05, "loss": 0.3906, "step": 6397, "task_loss": 0.8557810187339783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2892687916755676, "epoch": 5.41, "learning_rate": 1.1369399830938294e-05, "loss": 0.2789, "step": 6398, "task_loss": 0.19311028718948364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31194233894348145, "epoch": 5.41, "learning_rate": 1.1363361912812464e-05, "loss": 0.3577, "step": 6399, "task_loss": 0.7628109455108643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3408154547214508, "epoch": 5.41, "learning_rate": 1.1357323994686631e-05, "loss": 0.3633, "step": 6400, "task_loss": 0.7227268218994141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1861557513475418, "epoch": 5.41, "learning_rate": 1.1351286076560802e-05, "loss": 0.325, "step": 6401, "task_loss": 0.4219875931739807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2359963357448578, "epoch": 5.41, "learning_rate": 1.1345248158434972e-05, "loss": 0.4627, "step": 6402, "task_loss": 0.2423306107521057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3063146770000458, "epoch": 5.41, "learning_rate": 1.1339210240309143e-05, "loss": 0.3565, "step": 6403, "task_loss": 0.8020186424255371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32268252968788147, "epoch": 5.41, "learning_rate": 1.1333172322183312e-05, "loss": 0.3084, "step": 6404, "task_loss": 0.72157222032547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37873321771621704, "epoch": 5.41, "learning_rate": 1.132713440405748e-05, "loss": 0.4168, "step": 6405, "task_loss": 0.09997352957725525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24619954824447632, "epoch": 5.41, "learning_rate": 1.1321096485931651e-05, "loss": 0.3496, "step": 6406, "task_loss": 0.36713555455207825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4657585024833679, "epoch": 5.42, "learning_rate": 1.131505856780582e-05, "loss": 0.4327, "step": 6407, "task_loss": 0.5896143913269043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3560781180858612, "epoch": 5.42, "learning_rate": 1.130902064967999e-05, "loss": 0.2359, "step": 6408, "task_loss": 0.4423358738422394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1772746741771698, "epoch": 5.42, "learning_rate": 1.1302982731554161e-05, "loss": 0.2597, "step": 6409, "task_loss": 0.05670314282178879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2057124376296997, "epoch": 5.42, "learning_rate": 1.129694481342833e-05, "loss": 0.3262, "step": 6410, "task_loss": 1.1458468437194824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26755064725875854, "epoch": 5.42, "learning_rate": 1.1290906895302499e-05, "loss": 0.3066, "step": 6411, "task_loss": 0.3824005126953125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4289398789405823, "epoch": 5.42, "learning_rate": 1.128486897717667e-05, "loss": 0.4854, "step": 6412, "task_loss": 0.6911238431930542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31591129302978516, "epoch": 5.42, "learning_rate": 1.127883105905084e-05, "loss": 0.2602, "step": 6413, "task_loss": 0.7318911552429199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30210956931114197, "epoch": 5.42, "learning_rate": 1.127279314092501e-05, "loss": 0.3587, "step": 6414, "task_loss": 1.099611520767212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3000900149345398, "epoch": 5.42, "learning_rate": 1.1266755222799178e-05, "loss": 0.3405, "step": 6415, "task_loss": 0.2933809459209442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27972567081451416, "epoch": 5.42, "learning_rate": 1.1260717304673349e-05, "loss": 0.3776, "step": 6416, "task_loss": 2.0145983695983887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3423665761947632, "epoch": 5.42, "learning_rate": 1.1254679386547519e-05, "loss": 0.3858, "step": 6417, "task_loss": 0.6864776611328125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32236248254776, "epoch": 5.42, "learning_rate": 1.124864146842169e-05, "loss": 0.3768, "step": 6418, "task_loss": 0.4557684063911438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29667624831199646, "epoch": 5.43, "learning_rate": 1.1242603550295859e-05, "loss": 0.2484, "step": 6419, "task_loss": 0.5428160429000854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1539844274520874, "epoch": 5.43, "learning_rate": 1.1236565632170027e-05, "loss": 0.3467, "step": 6420, "task_loss": 0.05297285318374634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27041369676589966, "epoch": 5.43, "learning_rate": 1.1230527714044198e-05, "loss": 0.3829, "step": 6421, "task_loss": 0.9885236024856567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4683777987957001, "epoch": 5.43, "learning_rate": 1.1224489795918369e-05, "loss": 0.3505, "step": 6422, "task_loss": 0.18846796452999115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25327828526496887, "epoch": 5.43, "learning_rate": 1.1218451877792537e-05, "loss": 0.4245, "step": 6423, "task_loss": 0.822172224521637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3412300944328308, "epoch": 5.43, "learning_rate": 1.1212413959666708e-05, "loss": 0.3427, "step": 6424, "task_loss": 0.947530210018158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30233126878738403, "epoch": 5.43, "learning_rate": 1.1206376041540877e-05, "loss": 0.3337, "step": 6425, "task_loss": 0.23125073313713074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3245675265789032, "epoch": 5.43, "learning_rate": 1.1200338123415047e-05, "loss": 0.3213, "step": 6426, "task_loss": 0.5164424777030945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3619668781757355, "epoch": 5.43, "learning_rate": 1.1194300205289216e-05, "loss": 0.4353, "step": 6427, "task_loss": 0.4005638360977173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2983560562133789, "epoch": 5.43, "learning_rate": 1.1188262287163387e-05, "loss": 0.3028, "step": 6428, "task_loss": 0.2612011432647705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32533809542655945, "epoch": 5.43, "learning_rate": 1.1182224369037557e-05, "loss": 0.3722, "step": 6429, "task_loss": 0.26763850450515747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36616307497024536, "epoch": 5.44, "learning_rate": 1.1176186450911726e-05, "loss": 0.3597, "step": 6430, "task_loss": 0.46960383653640747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1833169162273407, "epoch": 5.44, "learning_rate": 1.1170148532785895e-05, "loss": 0.3413, "step": 6431, "task_loss": 0.09508258104324341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1912769377231598, "epoch": 5.44, "learning_rate": 1.1164110614660066e-05, "loss": 0.2717, "step": 6432, "task_loss": 0.2138340175151825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2556650638580322, "epoch": 5.44, "learning_rate": 1.1158072696534236e-05, "loss": 0.3139, "step": 6433, "task_loss": 0.46932312846183777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23199623823165894, "epoch": 5.44, "learning_rate": 1.1152034778408405e-05, "loss": 0.3816, "step": 6434, "task_loss": 0.6264767050743103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36115631461143494, "epoch": 5.44, "learning_rate": 1.1145996860282574e-05, "loss": 0.3319, "step": 6435, "task_loss": 0.7419612407684326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2800523340702057, "epoch": 5.44, "learning_rate": 1.1139958942156745e-05, "loss": 0.3019, "step": 6436, "task_loss": 0.543764054775238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2960984408855438, "epoch": 5.44, "learning_rate": 1.1133921024030915e-05, "loss": 0.3608, "step": 6437, "task_loss": 0.698871374130249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13173988461494446, "epoch": 5.44, "learning_rate": 1.1127883105905084e-05, "loss": 0.3243, "step": 6438, "task_loss": 0.026658644899725914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34444254636764526, "epoch": 5.44, "learning_rate": 1.1121845187779255e-05, "loss": 0.3799, "step": 6439, "task_loss": 0.09817514568567276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38399645686149597, "epoch": 5.44, "learning_rate": 1.1115807269653424e-05, "loss": 0.3317, "step": 6440, "task_loss": 0.5333911180496216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.290145605802536, "epoch": 5.44, "learning_rate": 1.1109769351527594e-05, "loss": 0.3461, "step": 6441, "task_loss": 1.399024486541748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3978484272956848, "epoch": 5.45, "learning_rate": 1.1103731433401763e-05, "loss": 0.3333, "step": 6442, "task_loss": 1.0191444158554077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44078728556632996, "epoch": 5.45, "learning_rate": 1.1097693515275934e-05, "loss": 0.4112, "step": 6443, "task_loss": 0.20034442842006683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33913084864616394, "epoch": 5.45, "learning_rate": 1.1091655597150104e-05, "loss": 0.3025, "step": 6444, "task_loss": 0.9649949073791504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3946729302406311, "epoch": 5.45, "learning_rate": 1.1085617679024273e-05, "loss": 0.4277, "step": 6445, "task_loss": 0.4904354214668274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1716299057006836, "epoch": 5.45, "learning_rate": 1.1079579760898442e-05, "loss": 0.3614, "step": 6446, "task_loss": 0.6903536319732666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4978010654449463, "epoch": 5.45, "learning_rate": 1.1073541842772613e-05, "loss": 0.4614, "step": 6447, "task_loss": 0.20190566778182983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2642395794391632, "epoch": 5.45, "learning_rate": 1.1067503924646783e-05, "loss": 0.3377, "step": 6448, "task_loss": 0.5707980394363403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35775160789489746, "epoch": 5.45, "learning_rate": 1.1061466006520952e-05, "loss": 0.4881, "step": 6449, "task_loss": 0.5772120952606201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.57925945520401, "epoch": 5.45, "learning_rate": 1.1055428088395121e-05, "loss": 0.3863, "step": 6450, "task_loss": 1.0456262826919556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3765580356121063, "epoch": 5.45, "learning_rate": 1.1049390170269291e-05, "loss": 0.3586, "step": 6451, "task_loss": 0.3912793695926666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2360905557870865, "epoch": 5.45, "learning_rate": 1.1043352252143462e-05, "loss": 0.2617, "step": 6452, "task_loss": 0.2659694254398346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16151882708072662, "epoch": 5.45, "learning_rate": 1.1037314334017631e-05, "loss": 0.279, "step": 6453, "task_loss": 0.9082026481628418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31746843457221985, "epoch": 5.46, "learning_rate": 1.1031276415891801e-05, "loss": 0.363, "step": 6454, "task_loss": 0.3456231951713562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46760284900665283, "epoch": 5.46, "learning_rate": 1.102523849776597e-05, "loss": 0.4596, "step": 6455, "task_loss": 0.4750438630580902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22991231083869934, "epoch": 5.46, "learning_rate": 1.1019200579640141e-05, "loss": 0.3524, "step": 6456, "task_loss": 0.6746044158935547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.532814621925354, "epoch": 5.46, "learning_rate": 1.101316266151431e-05, "loss": 0.3683, "step": 6457, "task_loss": 0.7073816061019897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5313459038734436, "epoch": 5.46, "learning_rate": 1.100712474338848e-05, "loss": 0.344, "step": 6458, "task_loss": 0.8208689093589783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27031657099723816, "epoch": 5.46, "learning_rate": 1.1001086825262651e-05, "loss": 0.358, "step": 6459, "task_loss": 0.3645097315311432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.438679963350296, "epoch": 5.46, "learning_rate": 1.099504890713682e-05, "loss": 0.4075, "step": 6460, "task_loss": 0.6856974363327026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28098559379577637, "epoch": 5.46, "learning_rate": 1.0989010989010989e-05, "loss": 0.3014, "step": 6461, "task_loss": 1.6346542835235596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18340666592121124, "epoch": 5.46, "learning_rate": 1.098297307088516e-05, "loss": 0.2585, "step": 6462, "task_loss": 0.13022379577159882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23711355030536652, "epoch": 5.46, "learning_rate": 1.097693515275933e-05, "loss": 0.2965, "step": 6463, "task_loss": 0.40872734785079956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29401707649230957, "epoch": 5.46, "learning_rate": 1.09708972346335e-05, "loss": 0.3913, "step": 6464, "task_loss": 0.9820622801780701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16179320216178894, "epoch": 5.46, "learning_rate": 1.0964859316507668e-05, "loss": 0.3002, "step": 6465, "task_loss": 1.2622770071029663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2492518126964569, "epoch": 5.47, "learning_rate": 1.0958821398381838e-05, "loss": 0.3022, "step": 6466, "task_loss": 0.2425045669078827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21373876929283142, "epoch": 5.47, "learning_rate": 1.0952783480256009e-05, "loss": 0.4386, "step": 6467, "task_loss": 1.3063602447509766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2697393000125885, "epoch": 5.47, "learning_rate": 1.094674556213018e-05, "loss": 0.2825, "step": 6468, "task_loss": 0.11640089005231857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5461252331733704, "epoch": 5.47, "learning_rate": 1.0940707644004348e-05, "loss": 0.429, "step": 6469, "task_loss": 0.9272728562355042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23511022329330444, "epoch": 5.47, "learning_rate": 1.0934669725878517e-05, "loss": 0.2774, "step": 6470, "task_loss": 0.13659122586250305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4089481830596924, "epoch": 5.47, "learning_rate": 1.0928631807752688e-05, "loss": 0.3883, "step": 6471, "task_loss": 0.7380985617637634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35446345806121826, "epoch": 5.47, "learning_rate": 1.0922593889626856e-05, "loss": 0.3771, "step": 6472, "task_loss": 0.44064071774482727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22515630722045898, "epoch": 5.47, "learning_rate": 1.0916555971501027e-05, "loss": 0.2235, "step": 6473, "task_loss": 1.1987643241882324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20604151487350464, "epoch": 5.47, "learning_rate": 1.0910518053375198e-05, "loss": 0.3277, "step": 6474, "task_loss": 0.9498670101165771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2517207860946655, "epoch": 5.47, "learning_rate": 1.0904480135249366e-05, "loss": 0.2998, "step": 6475, "task_loss": 0.45298323035240173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27831169962882996, "epoch": 5.47, "learning_rate": 1.0898442217123535e-05, "loss": 0.3351, "step": 6476, "task_loss": 0.7166332006454468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4635339379310608, "epoch": 5.47, "learning_rate": 1.0892404298997706e-05, "loss": 0.4435, "step": 6477, "task_loss": 0.5185782313346863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4543699324131012, "epoch": 5.48, "learning_rate": 1.0886366380871877e-05, "loss": 0.3731, "step": 6478, "task_loss": 0.7234900593757629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31964218616485596, "epoch": 5.48, "learning_rate": 1.0880328462746045e-05, "loss": 0.3065, "step": 6479, "task_loss": 0.259724885225296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21775320172309875, "epoch": 5.48, "learning_rate": 1.0874290544620214e-05, "loss": 0.3478, "step": 6480, "task_loss": 0.23127196729183197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29845476150512695, "epoch": 5.48, "learning_rate": 1.0868252626494385e-05, "loss": 0.319, "step": 6481, "task_loss": 0.7343119382858276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24467286467552185, "epoch": 5.48, "learning_rate": 1.0862214708368555e-05, "loss": 0.373, "step": 6482, "task_loss": 0.6678158640861511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5240477323532104, "epoch": 5.48, "learning_rate": 1.0856176790242726e-05, "loss": 0.4145, "step": 6483, "task_loss": 1.4520933628082275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3729757070541382, "epoch": 5.48, "learning_rate": 1.0850138872116893e-05, "loss": 0.3435, "step": 6484, "task_loss": 0.8951902985572815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2952122986316681, "epoch": 5.48, "learning_rate": 1.0844100953991064e-05, "loss": 0.3221, "step": 6485, "task_loss": 0.9254170060157776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3813907504081726, "epoch": 5.48, "learning_rate": 1.0838063035865234e-05, "loss": 0.4361, "step": 6486, "task_loss": 0.25857192277908325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28959470987319946, "epoch": 5.48, "learning_rate": 1.0832025117739405e-05, "loss": 0.3602, "step": 6487, "task_loss": 0.8793122172355652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14238083362579346, "epoch": 5.48, "learning_rate": 1.0825987199613574e-05, "loss": 0.2846, "step": 6488, "task_loss": 0.018870312720537186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30012795329093933, "epoch": 5.48, "learning_rate": 1.0819949281487743e-05, "loss": 0.353, "step": 6489, "task_loss": 0.9852735996246338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3186298608779907, "epoch": 5.49, "learning_rate": 1.0813911363361913e-05, "loss": 0.2894, "step": 6490, "task_loss": 0.5966883301734924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25656333565711975, "epoch": 5.49, "learning_rate": 1.0807873445236084e-05, "loss": 0.4195, "step": 6491, "task_loss": 1.8856227397918701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2829701900482178, "epoch": 5.49, "learning_rate": 1.0801835527110253e-05, "loss": 0.3427, "step": 6492, "task_loss": 0.63701331615448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5909008979797363, "epoch": 5.49, "learning_rate": 1.0795797608984423e-05, "loss": 0.4627, "step": 6493, "task_loss": 0.6346316337585449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24688027799129486, "epoch": 5.49, "learning_rate": 1.0789759690858592e-05, "loss": 0.3549, "step": 6494, "task_loss": 0.6836630702018738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27473390102386475, "epoch": 5.49, "learning_rate": 1.0783721772732763e-05, "loss": 0.3296, "step": 6495, "task_loss": 0.3093140721321106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4049716293811798, "epoch": 5.49, "learning_rate": 1.0777683854606932e-05, "loss": 0.3864, "step": 6496, "task_loss": 1.1587985754013062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2661289572715759, "epoch": 5.49, "learning_rate": 1.0771645936481102e-05, "loss": 0.2826, "step": 6497, "task_loss": 0.26125624775886536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19013603031635284, "epoch": 5.49, "learning_rate": 1.0765608018355273e-05, "loss": 0.285, "step": 6498, "task_loss": 1.070955514907837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43496471643447876, "epoch": 5.49, "learning_rate": 1.0759570100229442e-05, "loss": 0.4301, "step": 6499, "task_loss": 1.2177095413208008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3199244737625122, "epoch": 5.49, "learning_rate": 1.075353218210361e-05, "loss": 0.3426, "step": 6500, "task_loss": 0.49256569147109985 }, { "epoch": 5.49, "eval_accuracy": 0.919920792079208, "eval_loss": 0.21469196677207947, "eval_runtime": 319.34, "eval_samples_per_second": 79.069, "eval_steps_per_second": 0.62, "step": 6500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19605864584445953, "epoch": 5.5, "learning_rate": 1.0747494263977781e-05, "loss": 0.2772, "step": 6501, "task_loss": 0.454904168844223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.417763352394104, "epoch": 5.5, "learning_rate": 1.0741456345851952e-05, "loss": 0.3336, "step": 6502, "task_loss": 0.7853571176528931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21247412264347076, "epoch": 5.5, "learning_rate": 1.073541842772612e-05, "loss": 0.4098, "step": 6503, "task_loss": 0.21888747811317444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5036829113960266, "epoch": 5.5, "learning_rate": 1.072938050960029e-05, "loss": 0.3745, "step": 6504, "task_loss": 0.6078847050666809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3985949456691742, "epoch": 5.5, "learning_rate": 1.072334259147446e-05, "loss": 0.3783, "step": 6505, "task_loss": 0.39163002371788025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3291832208633423, "epoch": 5.5, "learning_rate": 1.071730467334863e-05, "loss": 0.3525, "step": 6506, "task_loss": 0.8737161755561829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1843566745519638, "epoch": 5.5, "learning_rate": 1.07112667552228e-05, "loss": 0.3426, "step": 6507, "task_loss": 0.2503706216812134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3048703968524933, "epoch": 5.5, "learning_rate": 1.070522883709697e-05, "loss": 0.3001, "step": 6508, "task_loss": 0.26111263036727905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2240523397922516, "epoch": 5.5, "learning_rate": 1.0699190918971139e-05, "loss": 0.3503, "step": 6509, "task_loss": 0.30820855498313904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2577289342880249, "epoch": 5.5, "learning_rate": 1.069315300084531e-05, "loss": 0.3233, "step": 6510, "task_loss": 0.5046443343162537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20948244631290436, "epoch": 5.5, "learning_rate": 1.0687115082719478e-05, "loss": 0.3011, "step": 6511, "task_loss": 0.4291733205318451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26499685645103455, "epoch": 5.5, "learning_rate": 1.0681077164593649e-05, "loss": 0.3826, "step": 6512, "task_loss": 0.5443810224533081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33218705654144287, "epoch": 5.51, "learning_rate": 1.067503924646782e-05, "loss": 0.3632, "step": 6513, "task_loss": 0.618726909160614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17350101470947266, "epoch": 5.51, "learning_rate": 1.0669001328341988e-05, "loss": 0.3378, "step": 6514, "task_loss": 0.44018012285232544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.179562509059906, "epoch": 5.51, "learning_rate": 1.0662963410216157e-05, "loss": 0.2942, "step": 6515, "task_loss": 0.6275087594985962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2596556842327118, "epoch": 5.51, "learning_rate": 1.0656925492090328e-05, "loss": 0.2836, "step": 6516, "task_loss": 0.1054178923368454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30808985233306885, "epoch": 5.51, "learning_rate": 1.0650887573964498e-05, "loss": 0.3323, "step": 6517, "task_loss": 0.9578037858009338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4792945384979248, "epoch": 5.51, "learning_rate": 1.0644849655838667e-05, "loss": 0.3174, "step": 6518, "task_loss": 0.3412033021450043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.280415415763855, "epoch": 5.51, "learning_rate": 1.0638811737712836e-05, "loss": 0.3509, "step": 6519, "task_loss": 0.41552287340164185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2797308564186096, "epoch": 5.51, "learning_rate": 1.0632773819587007e-05, "loss": 0.304, "step": 6520, "task_loss": 0.6273269057273865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36931952834129333, "epoch": 5.51, "learning_rate": 1.0626735901461177e-05, "loss": 0.3767, "step": 6521, "task_loss": 0.6237069368362427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2714664041996002, "epoch": 5.51, "learning_rate": 1.0620697983335346e-05, "loss": 0.2873, "step": 6522, "task_loss": 0.501672625541687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25009262561798096, "epoch": 5.51, "learning_rate": 1.0614660065209517e-05, "loss": 0.3352, "step": 6523, "task_loss": 0.5692862868309021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2635250985622406, "epoch": 5.51, "learning_rate": 1.0608622147083686e-05, "loss": 0.4189, "step": 6524, "task_loss": 0.2898869812488556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4312386214733124, "epoch": 5.52, "learning_rate": 1.0602584228957856e-05, "loss": 0.3822, "step": 6525, "task_loss": 0.8468645811080933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2972198724746704, "epoch": 5.52, "learning_rate": 1.0596546310832025e-05, "loss": 0.2918, "step": 6526, "task_loss": 0.5356125235557556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25711217522621155, "epoch": 5.52, "learning_rate": 1.0590508392706196e-05, "loss": 0.3101, "step": 6527, "task_loss": 0.27613991498947144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5145947337150574, "epoch": 5.52, "learning_rate": 1.0584470474580366e-05, "loss": 0.3305, "step": 6528, "task_loss": 0.7047602534294128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3822306990623474, "epoch": 5.52, "learning_rate": 1.0578432556454535e-05, "loss": 0.3409, "step": 6529, "task_loss": 0.3323548138141632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4595053195953369, "epoch": 5.52, "learning_rate": 1.0572394638328704e-05, "loss": 0.3539, "step": 6530, "task_loss": 0.9265221357345581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4153057932853699, "epoch": 5.52, "learning_rate": 1.0566356720202874e-05, "loss": 0.4093, "step": 6531, "task_loss": 1.06119966506958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1861926019191742, "epoch": 5.52, "learning_rate": 1.0560318802077045e-05, "loss": 0.2479, "step": 6532, "task_loss": 0.4757814407348633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32598021626472473, "epoch": 5.52, "learning_rate": 1.0554280883951216e-05, "loss": 0.3194, "step": 6533, "task_loss": 1.2440168857574463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19668327271938324, "epoch": 5.52, "learning_rate": 1.0548242965825383e-05, "loss": 0.2618, "step": 6534, "task_loss": 0.02481669746339321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5262462496757507, "epoch": 5.52, "learning_rate": 1.0542205047699553e-05, "loss": 0.3992, "step": 6535, "task_loss": 0.6809242367744446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21570110321044922, "epoch": 5.52, "learning_rate": 1.0536167129573724e-05, "loss": 0.3899, "step": 6536, "task_loss": 0.41723117232322693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2836676239967346, "epoch": 5.53, "learning_rate": 1.0530129211447893e-05, "loss": 0.2703, "step": 6537, "task_loss": 0.3860211670398712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3070584535598755, "epoch": 5.53, "learning_rate": 1.0524091293322063e-05, "loss": 0.3837, "step": 6538, "task_loss": 0.8222426772117615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26056692004203796, "epoch": 5.53, "learning_rate": 1.0518053375196232e-05, "loss": 0.3186, "step": 6539, "task_loss": 0.4425794780254364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5036625862121582, "epoch": 5.53, "learning_rate": 1.0512015457070403e-05, "loss": 0.3198, "step": 6540, "task_loss": 0.8016558885574341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3701319694519043, "epoch": 5.53, "learning_rate": 1.0505977538944572e-05, "loss": 0.3932, "step": 6541, "task_loss": 0.9214180707931519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31887495517730713, "epoch": 5.53, "learning_rate": 1.0499939620818742e-05, "loss": 0.2723, "step": 6542, "task_loss": 0.17772407829761505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26150307059288025, "epoch": 5.53, "learning_rate": 1.0493901702692913e-05, "loss": 0.3066, "step": 6543, "task_loss": 0.26097145676612854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4437938928604126, "epoch": 5.53, "learning_rate": 1.0487863784567082e-05, "loss": 0.3647, "step": 6544, "task_loss": 0.9022730588912964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22103744745254517, "epoch": 5.53, "learning_rate": 1.048182586644125e-05, "loss": 0.2879, "step": 6545, "task_loss": 0.7111725211143494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2651381194591522, "epoch": 5.53, "learning_rate": 1.0475787948315421e-05, "loss": 0.3508, "step": 6546, "task_loss": 1.355660080909729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1256418526172638, "epoch": 5.53, "learning_rate": 1.0469750030189592e-05, "loss": 0.3845, "step": 6547, "task_loss": 0.299135684967041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20779091119766235, "epoch": 5.53, "learning_rate": 1.0463712112063762e-05, "loss": 0.2912, "step": 6548, "task_loss": 0.05434903874993324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4354076683521271, "epoch": 5.54, "learning_rate": 1.045767419393793e-05, "loss": 0.3924, "step": 6549, "task_loss": 1.020510196685791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3806000351905823, "epoch": 5.54, "learning_rate": 1.04516362758121e-05, "loss": 0.3973, "step": 6550, "task_loss": 1.5078545808792114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39202332496643066, "epoch": 5.54, "learning_rate": 1.044559835768627e-05, "loss": 0.2923, "step": 6551, "task_loss": 1.3262795209884644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13660737872123718, "epoch": 5.54, "learning_rate": 1.0439560439560441e-05, "loss": 0.2157, "step": 6552, "task_loss": 0.5390368700027466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3211098313331604, "epoch": 5.54, "learning_rate": 1.043352252143461e-05, "loss": 0.4042, "step": 6553, "task_loss": 0.45373618602752686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3558329939842224, "epoch": 5.54, "learning_rate": 1.0427484603308779e-05, "loss": 0.3474, "step": 6554, "task_loss": 0.39932653307914734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3171035349369049, "epoch": 5.54, "learning_rate": 1.042144668518295e-05, "loss": 0.3297, "step": 6555, "task_loss": 0.8790751099586487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17677758634090424, "epoch": 5.54, "learning_rate": 1.041540876705712e-05, "loss": 0.3652, "step": 6556, "task_loss": 0.38963446021080017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2527502179145813, "epoch": 5.54, "learning_rate": 1.0409370848931289e-05, "loss": 0.2963, "step": 6557, "task_loss": 0.8474727272987366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.380832314491272, "epoch": 5.54, "learning_rate": 1.040333293080546e-05, "loss": 0.3786, "step": 6558, "task_loss": 0.5224317908287048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32252180576324463, "epoch": 5.54, "learning_rate": 1.0397295012679628e-05, "loss": 0.3227, "step": 6559, "task_loss": 0.5938923954963684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36108630895614624, "epoch": 5.54, "learning_rate": 1.0391257094553799e-05, "loss": 0.2973, "step": 6560, "task_loss": 0.6496260762214661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3700859546661377, "epoch": 5.55, "learning_rate": 1.0385219176427968e-05, "loss": 0.3579, "step": 6561, "task_loss": 0.4780781865119934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17350687086582184, "epoch": 5.55, "learning_rate": 1.0379181258302138e-05, "loss": 0.2123, "step": 6562, "task_loss": 0.10712534934282303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25922712683677673, "epoch": 5.55, "learning_rate": 1.0373143340176309e-05, "loss": 0.2954, "step": 6563, "task_loss": 0.5914672613143921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3715321719646454, "epoch": 5.55, "learning_rate": 1.0367105422050478e-05, "loss": 0.411, "step": 6564, "task_loss": 0.190865159034729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25570064783096313, "epoch": 5.55, "learning_rate": 1.0361067503924647e-05, "loss": 0.2983, "step": 6565, "task_loss": 1.411649465560913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44901710748672485, "epoch": 5.55, "learning_rate": 1.0355029585798817e-05, "loss": 0.3029, "step": 6566, "task_loss": 0.636223316192627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36977511644363403, "epoch": 5.55, "learning_rate": 1.0348991667672988e-05, "loss": 0.3319, "step": 6567, "task_loss": 1.0609616041183472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4175705015659332, "epoch": 5.55, "learning_rate": 1.0342953749547157e-05, "loss": 0.3791, "step": 6568, "task_loss": 0.6628451347351074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28261351585388184, "epoch": 5.55, "learning_rate": 1.0336915831421326e-05, "loss": 0.3672, "step": 6569, "task_loss": 0.09270396828651428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4044901132583618, "epoch": 5.55, "learning_rate": 1.0330877913295496e-05, "loss": 0.2921, "step": 6570, "task_loss": 1.2493391036987305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3853190243244171, "epoch": 5.55, "learning_rate": 1.0324839995169667e-05, "loss": 0.4581, "step": 6571, "task_loss": 1.317997694015503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28458163142204285, "epoch": 5.56, "learning_rate": 1.0318802077043836e-05, "loss": 0.3012, "step": 6572, "task_loss": 1.4752025604248047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3440338969230652, "epoch": 5.56, "learning_rate": 1.0312764158918005e-05, "loss": 0.3753, "step": 6573, "task_loss": 0.22700409591197968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29600754380226135, "epoch": 5.56, "learning_rate": 1.0306726240792175e-05, "loss": 0.2948, "step": 6574, "task_loss": 0.5498785972595215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2633250951766968, "epoch": 5.56, "learning_rate": 1.0300688322666346e-05, "loss": 0.3533, "step": 6575, "task_loss": 0.7242979407310486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23963522911071777, "epoch": 5.56, "learning_rate": 1.0294650404540515e-05, "loss": 0.3638, "step": 6576, "task_loss": 0.11894496530294418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2620319724082947, "epoch": 5.56, "learning_rate": 1.0288612486414685e-05, "loss": 0.3185, "step": 6577, "task_loss": 0.23133043944835663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22256460785865784, "epoch": 5.56, "learning_rate": 1.0282574568288854e-05, "loss": 0.2821, "step": 6578, "task_loss": 0.6564161777496338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2459985613822937, "epoch": 5.56, "learning_rate": 1.0276536650163025e-05, "loss": 0.3227, "step": 6579, "task_loss": 0.6598586440086365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2363634705543518, "epoch": 5.56, "learning_rate": 1.0270498732037193e-05, "loss": 0.261, "step": 6580, "task_loss": 0.3682951033115387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2551322877407074, "epoch": 5.56, "learning_rate": 1.0264460813911364e-05, "loss": 0.2726, "step": 6581, "task_loss": 0.29483234882354736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24961140751838684, "epoch": 5.56, "learning_rate": 1.0258422895785535e-05, "loss": 0.283, "step": 6582, "task_loss": 0.6793506741523743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27142009139060974, "epoch": 5.56, "learning_rate": 1.0252384977659703e-05, "loss": 0.3444, "step": 6583, "task_loss": 0.8120901584625244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22374442219734192, "epoch": 5.57, "learning_rate": 1.0246347059533872e-05, "loss": 0.2795, "step": 6584, "task_loss": 0.09725237637758255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31882625818252563, "epoch": 5.57, "learning_rate": 1.0240309141408043e-05, "loss": 0.3164, "step": 6585, "task_loss": 1.3605471849441528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2790541350841522, "epoch": 5.57, "learning_rate": 1.0234271223282213e-05, "loss": 0.2908, "step": 6586, "task_loss": 0.532240092754364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21717298030853271, "epoch": 5.57, "learning_rate": 1.0228233305156382e-05, "loss": 0.2403, "step": 6587, "task_loss": 0.06855680048465729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23580777645111084, "epoch": 5.57, "learning_rate": 1.0222195387030551e-05, "loss": 0.2753, "step": 6588, "task_loss": 0.4201302230358124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23298566043376923, "epoch": 5.57, "learning_rate": 1.0216157468904722e-05, "loss": 0.3444, "step": 6589, "task_loss": 0.5117650628089905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30784064531326294, "epoch": 5.57, "learning_rate": 1.0210119550778892e-05, "loss": 0.3219, "step": 6590, "task_loss": 0.6316930651664734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34325703978538513, "epoch": 5.57, "learning_rate": 1.0204081632653061e-05, "loss": 0.3021, "step": 6591, "task_loss": 0.35452041029930115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.353581964969635, "epoch": 5.57, "learning_rate": 1.0198043714527232e-05, "loss": 0.3095, "step": 6592, "task_loss": 0.48669952154159546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3249625563621521, "epoch": 5.57, "learning_rate": 1.01920057964014e-05, "loss": 0.3658, "step": 6593, "task_loss": 0.7602303624153137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21215477585792542, "epoch": 5.57, "learning_rate": 1.0185967878275571e-05, "loss": 0.2814, "step": 6594, "task_loss": 0.4305051267147064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3274220824241638, "epoch": 5.57, "learning_rate": 1.017992996014974e-05, "loss": 0.3565, "step": 6595, "task_loss": 0.10835804045200348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3006184697151184, "epoch": 5.58, "learning_rate": 1.017389204202391e-05, "loss": 0.378, "step": 6596, "task_loss": 0.6692013740539551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21366573870182037, "epoch": 5.58, "learning_rate": 1.0167854123898081e-05, "loss": 0.357, "step": 6597, "task_loss": 0.447931170463562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22452689707279205, "epoch": 5.58, "learning_rate": 1.016181620577225e-05, "loss": 0.3885, "step": 6598, "task_loss": 0.7681757807731628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2437310516834259, "epoch": 5.58, "learning_rate": 1.0155778287646419e-05, "loss": 0.2517, "step": 6599, "task_loss": 0.16719314455986023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2297041416168213, "epoch": 5.58, "learning_rate": 1.014974036952059e-05, "loss": 0.2219, "step": 6600, "task_loss": 0.6251035332679749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39838868379592896, "epoch": 5.58, "learning_rate": 1.014370245139476e-05, "loss": 0.451, "step": 6601, "task_loss": 1.203598141670227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32569634914398193, "epoch": 5.58, "learning_rate": 1.0137664533268929e-05, "loss": 0.2756, "step": 6602, "task_loss": 0.320221483707428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45050328969955444, "epoch": 5.58, "learning_rate": 1.0131626615143098e-05, "loss": 0.3811, "step": 6603, "task_loss": 1.541385531425476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21052812039852142, "epoch": 5.58, "learning_rate": 1.0125588697017269e-05, "loss": 0.2849, "step": 6604, "task_loss": 0.3560037910938263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44232141971588135, "epoch": 5.58, "learning_rate": 1.0119550778891439e-05, "loss": 0.3859, "step": 6605, "task_loss": 1.0662153959274292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20478521287441254, "epoch": 5.58, "learning_rate": 1.0113512860765608e-05, "loss": 0.3324, "step": 6606, "task_loss": 0.1562296748161316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26327094435691833, "epoch": 5.58, "learning_rate": 1.0107474942639779e-05, "loss": 0.3333, "step": 6607, "task_loss": 0.20151281356811523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30611905455589294, "epoch": 5.59, "learning_rate": 1.0101437024513947e-05, "loss": 0.4132, "step": 6608, "task_loss": 0.31689703464508057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26855289936065674, "epoch": 5.59, "learning_rate": 1.0095399106388118e-05, "loss": 0.4163, "step": 6609, "task_loss": 1.370315432548523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44461125135421753, "epoch": 5.59, "learning_rate": 1.0089361188262287e-05, "loss": 0.322, "step": 6610, "task_loss": 0.9757928252220154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13596820831298828, "epoch": 5.59, "learning_rate": 1.0083323270136457e-05, "loss": 0.2947, "step": 6611, "task_loss": 1.787024974822998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2562367618083954, "epoch": 5.59, "learning_rate": 1.0077285352010628e-05, "loss": 0.3124, "step": 6612, "task_loss": 0.3024076819419861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21355319023132324, "epoch": 5.59, "learning_rate": 1.0071247433884797e-05, "loss": 0.2992, "step": 6613, "task_loss": 0.5922558903694153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2877032160758972, "epoch": 5.59, "learning_rate": 1.0065209515758966e-05, "loss": 0.3151, "step": 6614, "task_loss": 0.5156304836273193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25829169154167175, "epoch": 5.59, "learning_rate": 1.0059171597633136e-05, "loss": 0.2987, "step": 6615, "task_loss": 0.8858876824378967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1903683841228485, "epoch": 5.59, "learning_rate": 1.0053133679507307e-05, "loss": 0.2537, "step": 6616, "task_loss": 0.8741224408149719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36677587032318115, "epoch": 5.59, "learning_rate": 1.0047095761381477e-05, "loss": 0.312, "step": 6617, "task_loss": 0.1604508012533188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16676542162895203, "epoch": 5.59, "learning_rate": 1.0041057843255645e-05, "loss": 0.2787, "step": 6618, "task_loss": 0.14459198713302612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2876531779766083, "epoch": 5.59, "learning_rate": 1.0035019925129815e-05, "loss": 0.3446, "step": 6619, "task_loss": 1.3382811546325684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2614789307117462, "epoch": 5.6, "learning_rate": 1.0028982007003986e-05, "loss": 0.2929, "step": 6620, "task_loss": 0.5848307013511658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40701302886009216, "epoch": 5.6, "learning_rate": 1.0022944088878156e-05, "loss": 0.3658, "step": 6621, "task_loss": 0.34466877579689026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36441197991371155, "epoch": 5.6, "learning_rate": 1.0016906170752325e-05, "loss": 0.4037, "step": 6622, "task_loss": 1.0667511224746704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3785821795463562, "epoch": 5.6, "learning_rate": 1.0010868252626494e-05, "loss": 0.2961, "step": 6623, "task_loss": 0.9392054080963135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.551764965057373, "epoch": 5.6, "learning_rate": 1.0004830334500665e-05, "loss": 0.4219, "step": 6624, "task_loss": 1.0309998989105225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22546285390853882, "epoch": 5.6, "learning_rate": 9.998792416374835e-06, "loss": 0.3477, "step": 6625, "task_loss": 0.009312462992966175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19731470942497253, "epoch": 5.6, "learning_rate": 9.992754498249004e-06, "loss": 0.2773, "step": 6626, "task_loss": 0.27100905776023865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3056289255619049, "epoch": 5.6, "learning_rate": 9.986716580123175e-06, "loss": 0.2578, "step": 6627, "task_loss": 0.9074018597602844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19171541929244995, "epoch": 5.6, "learning_rate": 9.980678661997344e-06, "loss": 0.2966, "step": 6628, "task_loss": 0.28693556785583496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49298495054244995, "epoch": 5.6, "learning_rate": 9.974640743871514e-06, "loss": 0.4226, "step": 6629, "task_loss": 0.6408815383911133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15026484429836273, "epoch": 5.6, "learning_rate": 9.968602825745683e-06, "loss": 0.3427, "step": 6630, "task_loss": 0.31861525774002075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3172023296356201, "epoch": 5.6, "learning_rate": 9.962564907619854e-06, "loss": 0.4151, "step": 6631, "task_loss": 1.4805562496185303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3523207902908325, "epoch": 5.61, "learning_rate": 9.956526989494024e-06, "loss": 0.3216, "step": 6632, "task_loss": 0.16999110579490662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1682361215353012, "epoch": 5.61, "learning_rate": 9.950489071368193e-06, "loss": 0.4581, "step": 6633, "task_loss": 0.630717933177948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26007193326950073, "epoch": 5.61, "learning_rate": 9.944451153242362e-06, "loss": 0.3658, "step": 6634, "task_loss": 0.22312162816524506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24310968816280365, "epoch": 5.61, "learning_rate": 9.938413235116533e-06, "loss": 0.3184, "step": 6635, "task_loss": 0.09064095467329025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41085612773895264, "epoch": 5.61, "learning_rate": 9.932375316990703e-06, "loss": 0.4677, "step": 6636, "task_loss": 0.8581368923187256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22768741846084595, "epoch": 5.61, "learning_rate": 9.926337398864872e-06, "loss": 0.3002, "step": 6637, "task_loss": 0.2018507570028305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3178390860557556, "epoch": 5.61, "learning_rate": 9.92029948073904e-06, "loss": 0.3298, "step": 6638, "task_loss": 0.7272287607192993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2403346598148346, "epoch": 5.61, "learning_rate": 9.914261562613211e-06, "loss": 0.3548, "step": 6639, "task_loss": 0.09424266964197159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2942059636116028, "epoch": 5.61, "learning_rate": 9.908223644487382e-06, "loss": 0.4104, "step": 6640, "task_loss": 0.39094278216362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23487915098667145, "epoch": 5.61, "learning_rate": 9.902185726361551e-06, "loss": 0.3899, "step": 6641, "task_loss": 0.4250715374946594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15083220601081848, "epoch": 5.61, "learning_rate": 9.896147808235721e-06, "loss": 0.4039, "step": 6642, "task_loss": 0.3072262406349182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27267032861709595, "epoch": 5.61, "learning_rate": 9.89010989010989e-06, "loss": 0.367, "step": 6643, "task_loss": 0.3982740640640259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25426122546195984, "epoch": 5.62, "learning_rate": 9.884071971984061e-06, "loss": 0.2974, "step": 6644, "task_loss": 0.0919603556394577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41948825120925903, "epoch": 5.62, "learning_rate": 9.87803405385823e-06, "loss": 0.4264, "step": 6645, "task_loss": 0.5023536682128906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27732914686203003, "epoch": 5.62, "learning_rate": 9.8719961357324e-06, "loss": 0.2784, "step": 6646, "task_loss": 0.36785688996315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2790338397026062, "epoch": 5.62, "learning_rate": 9.865958217606571e-06, "loss": 0.3268, "step": 6647, "task_loss": 1.2957967519760132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2502809464931488, "epoch": 5.62, "learning_rate": 9.85992029948074e-06, "loss": 0.2888, "step": 6648, "task_loss": 0.13503558933734894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32370227575302124, "epoch": 5.62, "learning_rate": 9.853882381354909e-06, "loss": 0.2739, "step": 6649, "task_loss": 0.47477930784225464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2908785939216614, "epoch": 5.62, "learning_rate": 9.84784446322908e-06, "loss": 0.3006, "step": 6650, "task_loss": 0.5614737868309021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4035569727420807, "epoch": 5.62, "learning_rate": 9.84180654510325e-06, "loss": 0.328, "step": 6651, "task_loss": 0.6102346181869507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3547568917274475, "epoch": 5.62, "learning_rate": 9.835768626977419e-06, "loss": 0.2989, "step": 6652, "task_loss": 0.6428453922271729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27552032470703125, "epoch": 5.62, "learning_rate": 9.829730708851588e-06, "loss": 0.3284, "step": 6653, "task_loss": 0.8723853826522827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2234157770872116, "epoch": 5.62, "learning_rate": 9.823692790725758e-06, "loss": 0.2854, "step": 6654, "task_loss": 0.6686185002326965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.530231237411499, "epoch": 5.63, "learning_rate": 9.817654872599929e-06, "loss": 0.379, "step": 6655, "task_loss": 1.197025179862976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4148868918418884, "epoch": 5.63, "learning_rate": 9.811616954474098e-06, "loss": 0.3152, "step": 6656, "task_loss": 0.7059207558631897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40065455436706543, "epoch": 5.63, "learning_rate": 9.805579036348266e-06, "loss": 0.3759, "step": 6657, "task_loss": 0.5633928775787354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4352220296859741, "epoch": 5.63, "learning_rate": 9.799541118222437e-06, "loss": 0.3941, "step": 6658, "task_loss": 0.3462733328342438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.09465480595827103, "epoch": 5.63, "learning_rate": 9.793503200096608e-06, "loss": 0.2088, "step": 6659, "task_loss": 0.08361466228961945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4917945861816406, "epoch": 5.63, "learning_rate": 9.787465281970776e-06, "loss": 0.4306, "step": 6660, "task_loss": 1.4461658000946045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19527994096279144, "epoch": 5.63, "learning_rate": 9.781427363844947e-06, "loss": 0.2273, "step": 6661, "task_loss": 0.5550509691238403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3901141583919525, "epoch": 5.63, "learning_rate": 9.775389445719116e-06, "loss": 0.3437, "step": 6662, "task_loss": 1.3549866676330566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.219003826379776, "epoch": 5.63, "learning_rate": 9.769351527593286e-06, "loss": 0.2172, "step": 6663, "task_loss": 0.6099364757537842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1981227993965149, "epoch": 5.63, "learning_rate": 9.763313609467455e-06, "loss": 0.3362, "step": 6664, "task_loss": 1.110323190689087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45371031761169434, "epoch": 5.63, "learning_rate": 9.757275691341626e-06, "loss": 0.4308, "step": 6665, "task_loss": 0.6126262545585632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22074496746063232, "epoch": 5.63, "learning_rate": 9.751237773215796e-06, "loss": 0.3059, "step": 6666, "task_loss": 0.4106799066066742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22257693111896515, "epoch": 5.64, "learning_rate": 9.745199855089965e-06, "loss": 0.3353, "step": 6667, "task_loss": 0.9288924932479858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27359193563461304, "epoch": 5.64, "learning_rate": 9.739161936964134e-06, "loss": 0.3131, "step": 6668, "task_loss": 0.2709417939186096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23967401683330536, "epoch": 5.64, "learning_rate": 9.733124018838305e-06, "loss": 0.2715, "step": 6669, "task_loss": 0.4728105366230011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28476187586784363, "epoch": 5.64, "learning_rate": 9.727086100712475e-06, "loss": 0.3985, "step": 6670, "task_loss": 1.1641244888305664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1994425654411316, "epoch": 5.64, "learning_rate": 9.721048182586644e-06, "loss": 0.3192, "step": 6671, "task_loss": 0.4683416187763214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35842248797416687, "epoch": 5.64, "learning_rate": 9.715010264460813e-06, "loss": 0.3481, "step": 6672, "task_loss": 0.4807520806789398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2918809950351715, "epoch": 5.64, "learning_rate": 9.708972346334984e-06, "loss": 0.3179, "step": 6673, "task_loss": 1.0348349809646606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19487132132053375, "epoch": 5.64, "learning_rate": 9.702934428209154e-06, "loss": 0.3511, "step": 6674, "task_loss": 1.0921744108200073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3358905017375946, "epoch": 5.64, "learning_rate": 9.696896510083323e-06, "loss": 0.3065, "step": 6675, "task_loss": 0.27514687180519104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35728585720062256, "epoch": 5.64, "learning_rate": 9.690858591957494e-06, "loss": 0.3371, "step": 6676, "task_loss": 0.5846573710441589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26731789112091064, "epoch": 5.64, "learning_rate": 9.684820673831663e-06, "loss": 0.3732, "step": 6677, "task_loss": 0.8093464970588684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3725751042366028, "epoch": 5.64, "learning_rate": 9.678782755705833e-06, "loss": 0.3756, "step": 6678, "task_loss": 0.4602545201778412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23934581875801086, "epoch": 5.65, "learning_rate": 9.672744837580002e-06, "loss": 0.2857, "step": 6679, "task_loss": 0.21866871416568756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.247604101896286, "epoch": 5.65, "learning_rate": 9.666706919454173e-06, "loss": 0.3565, "step": 6680, "task_loss": 0.6279710531234741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3483777940273285, "epoch": 5.65, "learning_rate": 9.660669001328343e-06, "loss": 0.2892, "step": 6681, "task_loss": 0.1212848424911499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2071179449558258, "epoch": 5.65, "learning_rate": 9.654631083202512e-06, "loss": 0.3154, "step": 6682, "task_loss": 0.7638772130012512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3858543038368225, "epoch": 5.65, "learning_rate": 9.648593165076681e-06, "loss": 0.3715, "step": 6683, "task_loss": 0.24035951495170593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16877201199531555, "epoch": 5.65, "learning_rate": 9.642555246950852e-06, "loss": 0.3126, "step": 6684, "task_loss": 0.3937308192253113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3020656704902649, "epoch": 5.65, "learning_rate": 9.636517328825022e-06, "loss": 0.2406, "step": 6685, "task_loss": 0.7210238575935364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34867820143699646, "epoch": 5.65, "learning_rate": 9.630479410699193e-06, "loss": 0.323, "step": 6686, "task_loss": 0.7599949836730957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22977223992347717, "epoch": 5.65, "learning_rate": 9.62444149257336e-06, "loss": 0.2645, "step": 6687, "task_loss": 0.26162874698638916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43113046884536743, "epoch": 5.65, "learning_rate": 9.61840357444753e-06, "loss": 0.3358, "step": 6688, "task_loss": 0.6177462935447693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2841758131980896, "epoch": 5.65, "learning_rate": 9.612365656321701e-06, "loss": 0.2962, "step": 6689, "task_loss": 0.5701577663421631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2760854959487915, "epoch": 5.65, "learning_rate": 9.606327738195872e-06, "loss": 0.2671, "step": 6690, "task_loss": 0.5382108092308044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31812548637390137, "epoch": 5.66, "learning_rate": 9.60028982007004e-06, "loss": 0.4377, "step": 6691, "task_loss": 0.9980971217155457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21837246417999268, "epoch": 5.66, "learning_rate": 9.59425190194421e-06, "loss": 0.3034, "step": 6692, "task_loss": 1.2985215187072754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25443923473358154, "epoch": 5.66, "learning_rate": 9.58821398381838e-06, "loss": 0.365, "step": 6693, "task_loss": 0.3209340572357178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2780131995677948, "epoch": 5.66, "learning_rate": 9.58217606569255e-06, "loss": 0.3563, "step": 6694, "task_loss": 0.8807173371315002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3336862623691559, "epoch": 5.66, "learning_rate": 9.57613814756672e-06, "loss": 0.4978, "step": 6695, "task_loss": 1.2240161895751953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29239898920059204, "epoch": 5.66, "learning_rate": 9.57010022944089e-06, "loss": 0.2988, "step": 6696, "task_loss": 0.38459235429763794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3391895890235901, "epoch": 5.66, "learning_rate": 9.564062311315059e-06, "loss": 0.339, "step": 6697, "task_loss": 0.699405312538147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2498028427362442, "epoch": 5.66, "learning_rate": 9.55802439318923e-06, "loss": 0.329, "step": 6698, "task_loss": 0.4092022180557251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26450836658477783, "epoch": 5.66, "learning_rate": 9.551986475063398e-06, "loss": 0.2745, "step": 6699, "task_loss": 0.38868454098701477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3599855303764343, "epoch": 5.66, "learning_rate": 9.545948556937569e-06, "loss": 0.3633, "step": 6700, "task_loss": 1.210742712020874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3716253638267517, "epoch": 5.66, "learning_rate": 9.53991063881174e-06, "loss": 0.3131, "step": 6701, "task_loss": 0.6585986614227295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34051573276519775, "epoch": 5.66, "learning_rate": 9.533872720685908e-06, "loss": 0.3077, "step": 6702, "task_loss": 0.9658828973770142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24935531616210938, "epoch": 5.67, "learning_rate": 9.527834802560077e-06, "loss": 0.3114, "step": 6703, "task_loss": 0.5348849296569824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3399733901023865, "epoch": 5.67, "learning_rate": 9.521796884434248e-06, "loss": 0.3596, "step": 6704, "task_loss": 0.9043667912483215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3496256470680237, "epoch": 5.67, "learning_rate": 9.515758966308418e-06, "loss": 0.3751, "step": 6705, "task_loss": 0.9980213642120361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4732043743133545, "epoch": 5.67, "learning_rate": 9.509721048182587e-06, "loss": 0.3476, "step": 6706, "task_loss": 0.19282160699367523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3645224869251251, "epoch": 5.67, "learning_rate": 9.503683130056756e-06, "loss": 0.3091, "step": 6707, "task_loss": 0.7697629332542419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27056795358657837, "epoch": 5.67, "learning_rate": 9.497645211930927e-06, "loss": 0.3818, "step": 6708, "task_loss": 1.225403904914856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20778575539588928, "epoch": 5.67, "learning_rate": 9.491607293805097e-06, "loss": 0.2822, "step": 6709, "task_loss": 0.11061453819274902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18221516907215118, "epoch": 5.67, "learning_rate": 9.485569375679266e-06, "loss": 0.3757, "step": 6710, "task_loss": 0.5422356724739075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2920825481414795, "epoch": 5.67, "learning_rate": 9.479531457553437e-06, "loss": 0.2972, "step": 6711, "task_loss": 0.43659284710884094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2830583453178406, "epoch": 5.67, "learning_rate": 9.473493539427606e-06, "loss": 0.3012, "step": 6712, "task_loss": 0.6122891902923584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17818909883499146, "epoch": 5.67, "learning_rate": 9.467455621301776e-06, "loss": 0.3209, "step": 6713, "task_loss": 0.2318684309720993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2694448232650757, "epoch": 5.67, "learning_rate": 9.461417703175945e-06, "loss": 0.2541, "step": 6714, "task_loss": 0.33924224972724915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20413166284561157, "epoch": 5.68, "learning_rate": 9.455379785050116e-06, "loss": 0.3385, "step": 6715, "task_loss": 0.846082329750061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20855392515659332, "epoch": 5.68, "learning_rate": 9.449341866924286e-06, "loss": 0.2259, "step": 6716, "task_loss": 0.25663742423057556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22986873984336853, "epoch": 5.68, "learning_rate": 9.443303948798455e-06, "loss": 0.3753, "step": 6717, "task_loss": 0.9498790502548218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36486148834228516, "epoch": 5.68, "learning_rate": 9.437266030672624e-06, "loss": 0.3647, "step": 6718, "task_loss": 1.4184638261795044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2073749601840973, "epoch": 5.68, "learning_rate": 9.431228112546794e-06, "loss": 0.3417, "step": 6719, "task_loss": 0.03551316633820534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2253510057926178, "epoch": 5.68, "learning_rate": 9.425190194420965e-06, "loss": 0.3538, "step": 6720, "task_loss": 1.1192268133163452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47966235876083374, "epoch": 5.68, "learning_rate": 9.419152276295134e-06, "loss": 0.3837, "step": 6721, "task_loss": 0.4770834743976593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17224150896072388, "epoch": 5.68, "learning_rate": 9.413114358169303e-06, "loss": 0.3675, "step": 6722, "task_loss": 0.2319435477256775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5343066453933716, "epoch": 5.68, "learning_rate": 9.407076440043473e-06, "loss": 0.4649, "step": 6723, "task_loss": 1.323258876800537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25592654943466187, "epoch": 5.68, "learning_rate": 9.401038521917644e-06, "loss": 0.411, "step": 6724, "task_loss": 0.6037693619728088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3307601809501648, "epoch": 5.68, "learning_rate": 9.395000603791813e-06, "loss": 0.3267, "step": 6725, "task_loss": 0.5927876830101013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2726568281650543, "epoch": 5.69, "learning_rate": 9.388962685665983e-06, "loss": 0.3372, "step": 6726, "task_loss": 0.632868766784668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32914626598358154, "epoch": 5.69, "learning_rate": 9.382924767540152e-06, "loss": 0.3599, "step": 6727, "task_loss": 0.8113178610801697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4490359425544739, "epoch": 5.69, "learning_rate": 9.376886849414323e-06, "loss": 0.3978, "step": 6728, "task_loss": 0.6970449090003967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25868669152259827, "epoch": 5.69, "learning_rate": 9.370848931288492e-06, "loss": 0.3718, "step": 6729, "task_loss": 0.3481305241584778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2483588606119156, "epoch": 5.69, "learning_rate": 9.364811013162662e-06, "loss": 0.1962, "step": 6730, "task_loss": 0.31206655502319336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22083695232868195, "epoch": 5.69, "learning_rate": 9.358773095036833e-06, "loss": 0.3842, "step": 6731, "task_loss": 0.8413810729980469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21256330609321594, "epoch": 5.69, "learning_rate": 9.352735176911002e-06, "loss": 0.3025, "step": 6732, "task_loss": 0.7703561782836914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2053307592868805, "epoch": 5.69, "learning_rate": 9.34669725878517e-06, "loss": 0.3288, "step": 6733, "task_loss": 0.8859851956367493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2902398705482483, "epoch": 5.69, "learning_rate": 9.340659340659341e-06, "loss": 0.3422, "step": 6734, "task_loss": 0.8825451135635376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36363911628723145, "epoch": 5.69, "learning_rate": 9.334621422533512e-06, "loss": 0.3489, "step": 6735, "task_loss": 0.2889043986797333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2659076452255249, "epoch": 5.69, "learning_rate": 9.32858350440768e-06, "loss": 0.3996, "step": 6736, "task_loss": 0.38967880606651306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45135387778282166, "epoch": 5.69, "learning_rate": 9.32254558628185e-06, "loss": 0.3376, "step": 6737, "task_loss": 1.2921254634857178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2693949043750763, "epoch": 5.7, "learning_rate": 9.31650766815602e-06, "loss": 0.3635, "step": 6738, "task_loss": 0.7292662858963013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18612559139728546, "epoch": 5.7, "learning_rate": 9.31046975003019e-06, "loss": 0.2286, "step": 6739, "task_loss": 0.15474848449230194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32821980118751526, "epoch": 5.7, "learning_rate": 9.30443183190436e-06, "loss": 0.3238, "step": 6740, "task_loss": 0.7995081543922424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3511100113391876, "epoch": 5.7, "learning_rate": 9.298393913778528e-06, "loss": 0.3367, "step": 6741, "task_loss": 0.6054714322090149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2660893499851227, "epoch": 5.7, "learning_rate": 9.292355995652699e-06, "loss": 0.3457, "step": 6742, "task_loss": 0.6325841546058655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.263333261013031, "epoch": 5.7, "learning_rate": 9.28631807752687e-06, "loss": 0.3169, "step": 6743, "task_loss": 0.7300500869750977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3479125499725342, "epoch": 5.7, "learning_rate": 9.280280159401038e-06, "loss": 0.2786, "step": 6744, "task_loss": 0.45297834277153015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38561031222343445, "epoch": 5.7, "learning_rate": 9.274242241275209e-06, "loss": 0.3626, "step": 6745, "task_loss": 0.24163340032100677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2248871922492981, "epoch": 5.7, "learning_rate": 9.268204323149378e-06, "loss": 0.2609, "step": 6746, "task_loss": 0.31448712944984436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2867663502693176, "epoch": 5.7, "learning_rate": 9.262166405023548e-06, "loss": 0.403, "step": 6747, "task_loss": 0.47776028513908386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33354026079177856, "epoch": 5.7, "learning_rate": 9.256128486897717e-06, "loss": 0.3628, "step": 6748, "task_loss": 0.29264533519744873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43614253401756287, "epoch": 5.7, "learning_rate": 9.250090568771888e-06, "loss": 0.2738, "step": 6749, "task_loss": 0.5842411518096924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37547898292541504, "epoch": 5.71, "learning_rate": 9.244052650646058e-06, "loss": 0.2532, "step": 6750, "task_loss": 0.3820696771144867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21175862848758698, "epoch": 5.71, "learning_rate": 9.238014732520227e-06, "loss": 0.2968, "step": 6751, "task_loss": 0.44464758038520813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27567094564437866, "epoch": 5.71, "learning_rate": 9.231976814394396e-06, "loss": 0.266, "step": 6752, "task_loss": 1.1394580602645874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20322303473949432, "epoch": 5.71, "learning_rate": 9.225938896268567e-06, "loss": 0.32, "step": 6753, "task_loss": 0.6921201944351196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33161062002182007, "epoch": 5.71, "learning_rate": 9.219900978142737e-06, "loss": 0.315, "step": 6754, "task_loss": 1.0753761529922485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1625143140554428, "epoch": 5.71, "learning_rate": 9.213863060016908e-06, "loss": 0.3375, "step": 6755, "task_loss": 0.5148271918296814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3685924708843231, "epoch": 5.71, "learning_rate": 9.207825141891075e-06, "loss": 0.3682, "step": 6756, "task_loss": 0.11790220439434052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37439239025115967, "epoch": 5.71, "learning_rate": 9.201787223765246e-06, "loss": 0.3431, "step": 6757, "task_loss": 1.3722107410430908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4163927435874939, "epoch": 5.71, "learning_rate": 9.195749305639416e-06, "loss": 0.3336, "step": 6758, "task_loss": 1.0265878438949585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3143104612827301, "epoch": 5.71, "learning_rate": 9.189711387513587e-06, "loss": 0.3183, "step": 6759, "task_loss": 0.6918104887008667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25094202160835266, "epoch": 5.71, "learning_rate": 9.183673469387756e-06, "loss": 0.3401, "step": 6760, "task_loss": 0.42602744698524475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30630671977996826, "epoch": 5.71, "learning_rate": 9.177635551261925e-06, "loss": 0.3684, "step": 6761, "task_loss": 0.28294169902801514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5162763595581055, "epoch": 5.72, "learning_rate": 9.171597633136095e-06, "loss": 0.3682, "step": 6762, "task_loss": 0.45580869913101196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3542356491088867, "epoch": 5.72, "learning_rate": 9.165559715010266e-06, "loss": 0.3246, "step": 6763, "task_loss": 1.2597782611846924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22881430387496948, "epoch": 5.72, "learning_rate": 9.159521796884435e-06, "loss": 0.3275, "step": 6764, "task_loss": 0.7849768400192261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23471112549304962, "epoch": 5.72, "learning_rate": 9.153483878758605e-06, "loss": 0.3407, "step": 6765, "task_loss": 0.16809262335300446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3118568956851959, "epoch": 5.72, "learning_rate": 9.147445960632774e-06, "loss": 0.293, "step": 6766, "task_loss": 1.3007707595825195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24268755316734314, "epoch": 5.72, "learning_rate": 9.141408042506945e-06, "loss": 0.2914, "step": 6767, "task_loss": 0.11472328007221222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.212004154920578, "epoch": 5.72, "learning_rate": 9.135370124381113e-06, "loss": 0.3848, "step": 6768, "task_loss": 0.9550939202308655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33811530470848083, "epoch": 5.72, "learning_rate": 9.129332206255284e-06, "loss": 0.3134, "step": 6769, "task_loss": 0.694075882434845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21356087923049927, "epoch": 5.72, "learning_rate": 9.123294288129455e-06, "loss": 0.3142, "step": 6770, "task_loss": 0.06461164355278015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36978477239608765, "epoch": 5.72, "learning_rate": 9.117256370003622e-06, "loss": 0.3946, "step": 6771, "task_loss": 0.9041073322296143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.341513067483902, "epoch": 5.72, "learning_rate": 9.111218451877792e-06, "loss": 0.3279, "step": 6772, "task_loss": 0.8175075054168701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23015889525413513, "epoch": 5.72, "learning_rate": 9.105180533751963e-06, "loss": 0.2533, "step": 6773, "task_loss": 0.3907427489757538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22012093663215637, "epoch": 5.73, "learning_rate": 9.099142615626133e-06, "loss": 0.351, "step": 6774, "task_loss": 0.34262990951538086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3147250711917877, "epoch": 5.73, "learning_rate": 9.093104697500302e-06, "loss": 0.301, "step": 6775, "task_loss": 0.18282942473888397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23380976915359497, "epoch": 5.73, "learning_rate": 9.087066779374471e-06, "loss": 0.259, "step": 6776, "task_loss": 0.3257080316543579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24187630414962769, "epoch": 5.73, "learning_rate": 9.081028861248642e-06, "loss": 0.311, "step": 6777, "task_loss": 0.3637525141239166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2383742332458496, "epoch": 5.73, "learning_rate": 9.074990943122812e-06, "loss": 0.2802, "step": 6778, "task_loss": 0.32427313923835754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34686243534088135, "epoch": 5.73, "learning_rate": 9.068953024996981e-06, "loss": 0.3449, "step": 6779, "task_loss": 0.6422751545906067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31147220730781555, "epoch": 5.73, "learning_rate": 9.062915106871152e-06, "loss": 0.3619, "step": 6780, "task_loss": 0.1964792013168335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4847821593284607, "epoch": 5.73, "learning_rate": 9.05687718874532e-06, "loss": 0.415, "step": 6781, "task_loss": 1.186382532119751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23843160271644592, "epoch": 5.73, "learning_rate": 9.050839270619491e-06, "loss": 0.3544, "step": 6782, "task_loss": 0.5413521528244019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17740921676158905, "epoch": 5.73, "learning_rate": 9.04480135249366e-06, "loss": 0.3289, "step": 6783, "task_loss": 0.7219019532203674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34742459654808044, "epoch": 5.73, "learning_rate": 9.03876343436783e-06, "loss": 0.3346, "step": 6784, "task_loss": 0.4329353868961334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20417815446853638, "epoch": 5.73, "learning_rate": 9.032725516242001e-06, "loss": 0.2451, "step": 6785, "task_loss": 0.5165271162986755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35130229592323303, "epoch": 5.74, "learning_rate": 9.02668759811617e-06, "loss": 0.3433, "step": 6786, "task_loss": 0.8228366374969482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2993309497833252, "epoch": 5.74, "learning_rate": 9.020649679990339e-06, "loss": 0.3947, "step": 6787, "task_loss": 0.3163440525531769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4651809334754944, "epoch": 5.74, "learning_rate": 9.01461176186451e-06, "loss": 0.3426, "step": 6788, "task_loss": 0.5412782430648804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1480782926082611, "epoch": 5.74, "learning_rate": 9.00857384373868e-06, "loss": 0.3634, "step": 6789, "task_loss": 0.10648861527442932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.282312273979187, "epoch": 5.74, "learning_rate": 9.002535925612849e-06, "loss": 0.2936, "step": 6790, "task_loss": 0.39820596575737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39025017619132996, "epoch": 5.74, "learning_rate": 8.996498007487018e-06, "loss": 0.3504, "step": 6791, "task_loss": 1.0163171291351318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1860380470752716, "epoch": 5.74, "learning_rate": 8.990460089361189e-06, "loss": 0.4096, "step": 6792, "task_loss": 0.6283582448959351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31748756766319275, "epoch": 5.74, "learning_rate": 8.984422171235359e-06, "loss": 0.3262, "step": 6793, "task_loss": 0.3466925024986267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3524669408798218, "epoch": 5.74, "learning_rate": 8.978384253109528e-06, "loss": 0.2693, "step": 6794, "task_loss": 0.469058632850647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2234385460615158, "epoch": 5.74, "learning_rate": 8.972346334983699e-06, "loss": 0.288, "step": 6795, "task_loss": 1.2980234622955322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3119948208332062, "epoch": 5.74, "learning_rate": 8.966308416857867e-06, "loss": 0.3731, "step": 6796, "task_loss": 0.4244045317173004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.252346932888031, "epoch": 5.75, "learning_rate": 8.960270498732038e-06, "loss": 0.2795, "step": 6797, "task_loss": 0.2194671779870987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2637683153152466, "epoch": 5.75, "learning_rate": 8.954232580606207e-06, "loss": 0.3212, "step": 6798, "task_loss": 0.11982636898756027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3571907877922058, "epoch": 5.75, "learning_rate": 8.948194662480377e-06, "loss": 0.3283, "step": 6799, "task_loss": 0.1784391701221466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24307407438755035, "epoch": 5.75, "learning_rate": 8.942156744354548e-06, "loss": 0.3528, "step": 6800, "task_loss": 0.5316026210784912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6448642015457153, "epoch": 5.75, "learning_rate": 8.936118826228717e-06, "loss": 0.4073, "step": 6801, "task_loss": 1.1084541082382202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3058089017868042, "epoch": 5.75, "learning_rate": 8.930080908102886e-06, "loss": 0.2183, "step": 6802, "task_loss": 0.23170438408851624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29332706332206726, "epoch": 5.75, "learning_rate": 8.924042989977056e-06, "loss": 0.3795, "step": 6803, "task_loss": 0.40722039341926575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4489930272102356, "epoch": 5.75, "learning_rate": 8.918005071851227e-06, "loss": 0.2953, "step": 6804, "task_loss": 0.4823020100593567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3362314999103546, "epoch": 5.75, "learning_rate": 8.911967153725396e-06, "loss": 0.465, "step": 6805, "task_loss": 0.9788211584091187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25262758135795593, "epoch": 5.75, "learning_rate": 8.905929235599565e-06, "loss": 0.2971, "step": 6806, "task_loss": 0.9398433566093445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3494613468647003, "epoch": 5.75, "learning_rate": 8.899891317473735e-06, "loss": 0.3492, "step": 6807, "task_loss": 0.419023722410202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3710534870624542, "epoch": 5.75, "learning_rate": 8.893853399347906e-06, "loss": 0.3705, "step": 6808, "task_loss": 1.299156904220581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2915880084037781, "epoch": 5.76, "learning_rate": 8.887815481222075e-06, "loss": 0.3405, "step": 6809, "task_loss": 0.4700862169265747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28745555877685547, "epoch": 5.76, "learning_rate": 8.881777563096245e-06, "loss": 0.3547, "step": 6810, "task_loss": 0.05227296054363251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30972838401794434, "epoch": 5.76, "learning_rate": 8.875739644970414e-06, "loss": 0.2965, "step": 6811, "task_loss": 0.09527745097875595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2529550790786743, "epoch": 5.76, "learning_rate": 8.869701726844585e-06, "loss": 0.3014, "step": 6812, "task_loss": 0.3369404375553131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17984306812286377, "epoch": 5.76, "learning_rate": 8.863663808718754e-06, "loss": 0.3027, "step": 6813, "task_loss": 0.49976420402526855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19827823340892792, "epoch": 5.76, "learning_rate": 8.857625890592924e-06, "loss": 0.2429, "step": 6814, "task_loss": 0.6742117404937744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4027412533760071, "epoch": 5.76, "learning_rate": 8.851587972467095e-06, "loss": 0.3962, "step": 6815, "task_loss": 0.8302390575408936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30126601457595825, "epoch": 5.76, "learning_rate": 8.845550054341264e-06, "loss": 0.3529, "step": 6816, "task_loss": 0.4201963245868683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32329753041267395, "epoch": 5.76, "learning_rate": 8.839512136215432e-06, "loss": 0.4186, "step": 6817, "task_loss": 1.516167163848877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.333527147769928, "epoch": 5.76, "learning_rate": 8.833474218089603e-06, "loss": 0.3436, "step": 6818, "task_loss": 0.26498958468437195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3633437752723694, "epoch": 5.76, "learning_rate": 8.827436299963774e-06, "loss": 0.3403, "step": 6819, "task_loss": 0.9544343948364258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20849491655826569, "epoch": 5.76, "learning_rate": 8.821398381837944e-06, "loss": 0.2872, "step": 6820, "task_loss": 0.16680623590946198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.10722611844539642, "epoch": 5.77, "learning_rate": 8.815360463712111e-06, "loss": 0.252, "step": 6821, "task_loss": 0.35266876220703125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14098675549030304, "epoch": 5.77, "learning_rate": 8.809322545586282e-06, "loss": 0.305, "step": 6822, "task_loss": 0.11971619725227356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25134384632110596, "epoch": 5.77, "learning_rate": 8.803284627460452e-06, "loss": 0.4171, "step": 6823, "task_loss": 0.6320608854293823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29755938053131104, "epoch": 5.77, "learning_rate": 8.797246709334623e-06, "loss": 0.3133, "step": 6824, "task_loss": 0.07345768064260483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30664390325546265, "epoch": 5.77, "learning_rate": 8.791208791208792e-06, "loss": 0.2887, "step": 6825, "task_loss": 0.8971396684646606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2616092562675476, "epoch": 5.77, "learning_rate": 8.78517087308296e-06, "loss": 0.313, "step": 6826, "task_loss": 0.49885064363479614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3861767649650574, "epoch": 5.77, "learning_rate": 8.779132954957131e-06, "loss": 0.3836, "step": 6827, "task_loss": 0.21466924250125885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3841114640235901, "epoch": 5.77, "learning_rate": 8.773095036831302e-06, "loss": 0.3429, "step": 6828, "task_loss": 0.6433093547821045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3225157856941223, "epoch": 5.77, "learning_rate": 8.76705711870547e-06, "loss": 0.3405, "step": 6829, "task_loss": 0.13862262666225433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32170212268829346, "epoch": 5.77, "learning_rate": 8.76101920057964e-06, "loss": 0.3948, "step": 6830, "task_loss": 0.07021767646074295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26619377732276917, "epoch": 5.77, "learning_rate": 8.75498128245381e-06, "loss": 0.4156, "step": 6831, "task_loss": 0.32162463665008545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5614515542984009, "epoch": 5.77, "learning_rate": 8.74894336432798e-06, "loss": 0.338, "step": 6832, "task_loss": 0.679468035697937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26678386330604553, "epoch": 5.78, "learning_rate": 8.74290544620215e-06, "loss": 0.4145, "step": 6833, "task_loss": 1.1211166381835938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32910749316215515, "epoch": 5.78, "learning_rate": 8.73686752807632e-06, "loss": 0.2787, "step": 6834, "task_loss": 0.6537685990333557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21814312040805817, "epoch": 5.78, "learning_rate": 8.73082960995049e-06, "loss": 0.326, "step": 6835, "task_loss": 0.9861119985580444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2716771364212036, "epoch": 5.78, "learning_rate": 8.724791691824658e-06, "loss": 0.3494, "step": 6836, "task_loss": 0.1191423088312149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20543721318244934, "epoch": 5.78, "learning_rate": 8.718753773698829e-06, "loss": 0.3047, "step": 6837, "task_loss": 0.5138368010520935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2736218571662903, "epoch": 5.78, "learning_rate": 8.712715855573e-06, "loss": 0.4336, "step": 6838, "task_loss": 0.590172290802002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22049441933631897, "epoch": 5.78, "learning_rate": 8.70667793744717e-06, "loss": 0.2857, "step": 6839, "task_loss": 0.6798824667930603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4032402038574219, "epoch": 5.78, "learning_rate": 8.700640019321337e-06, "loss": 0.3468, "step": 6840, "task_loss": 0.9155799746513367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2685297429561615, "epoch": 5.78, "learning_rate": 8.694602101195508e-06, "loss": 0.3614, "step": 6841, "task_loss": 0.6793234348297119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18337826430797577, "epoch": 5.78, "learning_rate": 8.688564183069678e-06, "loss": 0.2946, "step": 6842, "task_loss": 0.22861981391906738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2675477862358093, "epoch": 5.78, "learning_rate": 8.682526264943849e-06, "loss": 0.2739, "step": 6843, "task_loss": 0.12137077748775482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42980194091796875, "epoch": 5.78, "learning_rate": 8.676488346818018e-06, "loss": 0.3703, "step": 6844, "task_loss": 1.4551500082015991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34650009870529175, "epoch": 5.79, "learning_rate": 8.670450428692186e-06, "loss": 0.2587, "step": 6845, "task_loss": 0.39244571328163147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16021814942359924, "epoch": 5.79, "learning_rate": 8.664412510566357e-06, "loss": 0.349, "step": 6846, "task_loss": 0.1344948261976242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20736634731292725, "epoch": 5.79, "learning_rate": 8.658374592440528e-06, "loss": 0.2937, "step": 6847, "task_loss": 0.06999899446964264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24630242586135864, "epoch": 5.79, "learning_rate": 8.652336674314696e-06, "loss": 0.3601, "step": 6848, "task_loss": 0.5384202599525452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2929011285305023, "epoch": 5.79, "learning_rate": 8.646298756188867e-06, "loss": 0.2833, "step": 6849, "task_loss": 1.1618715524673462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23757028579711914, "epoch": 5.79, "learning_rate": 8.640260838063036e-06, "loss": 0.2599, "step": 6850, "task_loss": 0.16063137352466583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24718737602233887, "epoch": 5.79, "learning_rate": 8.634222919937206e-06, "loss": 0.3238, "step": 6851, "task_loss": 0.18875190615653992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19931825995445251, "epoch": 5.79, "learning_rate": 8.628185001811375e-06, "loss": 0.3167, "step": 6852, "task_loss": 0.44816961884498596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32257580757141113, "epoch": 5.79, "learning_rate": 8.622147083685546e-06, "loss": 0.3674, "step": 6853, "task_loss": 0.8862510323524475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19012907147407532, "epoch": 5.79, "learning_rate": 8.616109165559716e-06, "loss": 0.3493, "step": 6854, "task_loss": 0.5570357441902161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35709720849990845, "epoch": 5.79, "learning_rate": 8.610071247433885e-06, "loss": 0.3596, "step": 6855, "task_loss": 0.8204060196876526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32011979818344116, "epoch": 5.79, "learning_rate": 8.604033329308054e-06, "loss": 0.2709, "step": 6856, "task_loss": 0.6733755469322205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4925394356250763, "epoch": 5.8, "learning_rate": 8.597995411182225e-06, "loss": 0.3136, "step": 6857, "task_loss": 0.959753692150116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2787480652332306, "epoch": 5.8, "learning_rate": 8.591957493056395e-06, "loss": 0.3657, "step": 6858, "task_loss": 0.9048730134963989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2790958285331726, "epoch": 5.8, "learning_rate": 8.585919574930564e-06, "loss": 0.2595, "step": 6859, "task_loss": 0.6930593252182007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3738304376602173, "epoch": 5.8, "learning_rate": 8.579881656804733e-06, "loss": 0.3157, "step": 6860, "task_loss": 0.8365026116371155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37123098969459534, "epoch": 5.8, "learning_rate": 8.573843738678904e-06, "loss": 0.3055, "step": 6861, "task_loss": 0.6017484068870544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39806067943573, "epoch": 5.8, "learning_rate": 8.567805820553074e-06, "loss": 0.3269, "step": 6862, "task_loss": 1.036634922027588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2979602813720703, "epoch": 5.8, "learning_rate": 8.561767902427243e-06, "loss": 0.2462, "step": 6863, "task_loss": 1.2299151420593262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28307226300239563, "epoch": 5.8, "learning_rate": 8.555729984301414e-06, "loss": 0.2979, "step": 6864, "task_loss": 0.5267876386642456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13897141814231873, "epoch": 5.8, "learning_rate": 8.549692066175583e-06, "loss": 0.3002, "step": 6865, "task_loss": 0.008434029296040535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30509644746780396, "epoch": 5.8, "learning_rate": 8.543654148049753e-06, "loss": 0.2354, "step": 6866, "task_loss": 0.24566194415092468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16929952800273895, "epoch": 5.8, "learning_rate": 8.537616229923922e-06, "loss": 0.3328, "step": 6867, "task_loss": 0.26823195815086365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2960774898529053, "epoch": 5.81, "learning_rate": 8.531578311798093e-06, "loss": 0.3723, "step": 6868, "task_loss": 1.416679859161377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2969435751438141, "epoch": 5.81, "learning_rate": 8.525540393672263e-06, "loss": 0.3781, "step": 6869, "task_loss": 0.8822405934333801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4674602448940277, "epoch": 5.81, "learning_rate": 8.519502475546432e-06, "loss": 0.3197, "step": 6870, "task_loss": 0.7326469421386719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3449382781982422, "epoch": 5.81, "learning_rate": 8.513464557420601e-06, "loss": 0.3072, "step": 6871, "task_loss": 0.48676517605781555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2726067900657654, "epoch": 5.81, "learning_rate": 8.507426639294772e-06, "loss": 0.3322, "step": 6872, "task_loss": 1.0381094217300415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3374252915382385, "epoch": 5.81, "learning_rate": 8.501388721168942e-06, "loss": 0.3536, "step": 6873, "task_loss": 0.33802321553230286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22558525204658508, "epoch": 5.81, "learning_rate": 8.495350803043111e-06, "loss": 0.2603, "step": 6874, "task_loss": 0.29744794964790344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21319210529327393, "epoch": 5.81, "learning_rate": 8.48931288491728e-06, "loss": 0.3464, "step": 6875, "task_loss": 0.4342975318431854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46119076013565063, "epoch": 5.81, "learning_rate": 8.48327496679145e-06, "loss": 0.282, "step": 6876, "task_loss": 0.28919097781181335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2934221625328064, "epoch": 5.81, "learning_rate": 8.477237048665621e-06, "loss": 0.2596, "step": 6877, "task_loss": 0.4192080497741699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2823988199234009, "epoch": 5.81, "learning_rate": 8.47119913053979e-06, "loss": 0.3544, "step": 6878, "task_loss": 0.7040286064147949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3786158859729767, "epoch": 5.81, "learning_rate": 8.46516121241396e-06, "loss": 0.3937, "step": 6879, "task_loss": 0.9234420657157898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15815480053424835, "epoch": 5.82, "learning_rate": 8.45912329428813e-06, "loss": 0.2831, "step": 6880, "task_loss": 0.06883621215820312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28108736872673035, "epoch": 5.82, "learning_rate": 8.4530853761623e-06, "loss": 0.3659, "step": 6881, "task_loss": 0.8309874534606934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49132251739501953, "epoch": 5.82, "learning_rate": 8.447047458036469e-06, "loss": 0.3487, "step": 6882, "task_loss": 0.7970477342605591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3891623616218567, "epoch": 5.82, "learning_rate": 8.44100953991064e-06, "loss": 0.28, "step": 6883, "task_loss": 0.5975841879844666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3872033357620239, "epoch": 5.82, "learning_rate": 8.43497162178481e-06, "loss": 0.3159, "step": 6884, "task_loss": 0.7571845054626465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22039680182933807, "epoch": 5.82, "learning_rate": 8.428933703658979e-06, "loss": 0.274, "step": 6885, "task_loss": 0.12914903461933136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33630889654159546, "epoch": 5.82, "learning_rate": 8.422895785533148e-06, "loss": 0.4075, "step": 6886, "task_loss": 0.9421035051345825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3758997917175293, "epoch": 5.82, "learning_rate": 8.416857867407318e-06, "loss": 0.3557, "step": 6887, "task_loss": 0.9861704111099243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2682221233844757, "epoch": 5.82, "learning_rate": 8.410819949281489e-06, "loss": 0.2656, "step": 6888, "task_loss": 0.35357576608657837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2722761034965515, "epoch": 5.82, "learning_rate": 8.40478203115566e-06, "loss": 0.3202, "step": 6889, "task_loss": 0.16827061772346497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4877154231071472, "epoch": 5.82, "learning_rate": 8.398744113029827e-06, "loss": 0.4149, "step": 6890, "task_loss": 0.8606184124946594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1444116234779358, "epoch": 5.82, "learning_rate": 8.392706194903997e-06, "loss": 0.2958, "step": 6891, "task_loss": 0.3494073450565338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18683257699012756, "epoch": 5.83, "learning_rate": 8.386668276778168e-06, "loss": 0.2862, "step": 6892, "task_loss": 0.016545919701457024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25547975301742554, "epoch": 5.83, "learning_rate": 8.380630358652338e-06, "loss": 0.3304, "step": 6893, "task_loss": 0.28666117787361145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1845293939113617, "epoch": 5.83, "learning_rate": 8.374592440526507e-06, "loss": 0.3607, "step": 6894, "task_loss": 1.122642993927002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16778206825256348, "epoch": 5.83, "learning_rate": 8.368554522400676e-06, "loss": 0.2472, "step": 6895, "task_loss": 0.8269186019897461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16416950523853302, "epoch": 5.83, "learning_rate": 8.362516604274847e-06, "loss": 0.3844, "step": 6896, "task_loss": 0.5367133021354675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19856344163417816, "epoch": 5.83, "learning_rate": 8.356478686149015e-06, "loss": 0.365, "step": 6897, "task_loss": 0.6465852856636047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21280086040496826, "epoch": 5.83, "learning_rate": 8.350440768023186e-06, "loss": 0.3256, "step": 6898, "task_loss": 0.570350706577301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.10447249561548233, "epoch": 5.83, "learning_rate": 8.344402849897357e-06, "loss": 0.2361, "step": 6899, "task_loss": 0.03793485462665558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4175979793071747, "epoch": 5.83, "learning_rate": 8.338364931771525e-06, "loss": 0.3417, "step": 6900, "task_loss": 1.289917230606079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22460903227329254, "epoch": 5.83, "learning_rate": 8.332327013645694e-06, "loss": 0.3473, "step": 6901, "task_loss": 0.3976024389266968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.425128310918808, "epoch": 5.83, "learning_rate": 8.326289095519865e-06, "loss": 0.3525, "step": 6902, "task_loss": 0.4138016700744629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33803921937942505, "epoch": 5.83, "learning_rate": 8.320251177394036e-06, "loss": 0.3406, "step": 6903, "task_loss": 1.0188745260238647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19412288069725037, "epoch": 5.84, "learning_rate": 8.314213259268206e-06, "loss": 0.297, "step": 6904, "task_loss": 0.7033845782279968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2686442732810974, "epoch": 5.84, "learning_rate": 8.308175341142373e-06, "loss": 0.239, "step": 6905, "task_loss": 0.20966507494449615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23062875866889954, "epoch": 5.84, "learning_rate": 8.302137423016544e-06, "loss": 0.2488, "step": 6906, "task_loss": 0.374958872795105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25915366411209106, "epoch": 5.84, "learning_rate": 8.296099504890714e-06, "loss": 0.3511, "step": 6907, "task_loss": 0.06120946630835533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3022812604904175, "epoch": 5.84, "learning_rate": 8.290061586764885e-06, "loss": 0.2776, "step": 6908, "task_loss": 0.6218221783638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38183850049972534, "epoch": 5.84, "learning_rate": 8.284023668639054e-06, "loss": 0.3267, "step": 6909, "task_loss": 0.24596989154815674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36270713806152344, "epoch": 5.84, "learning_rate": 8.277985750513223e-06, "loss": 0.3699, "step": 6910, "task_loss": 0.8507127165794373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2564859986305237, "epoch": 5.84, "learning_rate": 8.271947832387393e-06, "loss": 0.316, "step": 6911, "task_loss": 0.42980504035949707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22548967599868774, "epoch": 5.84, "learning_rate": 8.265909914261564e-06, "loss": 0.4453, "step": 6912, "task_loss": 1.2512115240097046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3679240942001343, "epoch": 5.84, "learning_rate": 8.259871996135733e-06, "loss": 0.3306, "step": 6913, "task_loss": 0.9479659795761108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3231949806213379, "epoch": 5.84, "learning_rate": 8.253834078009903e-06, "loss": 0.3035, "step": 6914, "task_loss": 0.33886441588401794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23712526261806488, "epoch": 5.84, "learning_rate": 8.247796159884072e-06, "loss": 0.2526, "step": 6915, "task_loss": 0.3149671256542206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.319580078125, "epoch": 5.85, "learning_rate": 8.241758241758243e-06, "loss": 0.2881, "step": 6916, "task_loss": 0.36493057012557983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2674461007118225, "epoch": 5.85, "learning_rate": 8.235720323632412e-06, "loss": 0.2927, "step": 6917, "task_loss": 0.16240312159061432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37601083517074585, "epoch": 5.85, "learning_rate": 8.229682405506582e-06, "loss": 0.342, "step": 6918, "task_loss": 0.5631283521652222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33603811264038086, "epoch": 5.85, "learning_rate": 8.223644487380751e-06, "loss": 0.4201, "step": 6919, "task_loss": 0.526728093624115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32842594385147095, "epoch": 5.85, "learning_rate": 8.217606569254922e-06, "loss": 0.3164, "step": 6920, "task_loss": 0.8001182675361633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42049461603164673, "epoch": 5.85, "learning_rate": 8.21156865112909e-06, "loss": 0.2995, "step": 6921, "task_loss": 0.675282895565033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29933029413223267, "epoch": 5.85, "learning_rate": 8.205530733003261e-06, "loss": 0.3305, "step": 6922, "task_loss": 0.5614340901374817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2972226142883301, "epoch": 5.85, "learning_rate": 8.199492814877432e-06, "loss": 0.2944, "step": 6923, "task_loss": 0.3985469341278076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36999019980430603, "epoch": 5.85, "learning_rate": 8.1934548967516e-06, "loss": 0.3253, "step": 6924, "task_loss": 0.40707042813301086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3107728660106659, "epoch": 5.85, "learning_rate": 8.18741697862577e-06, "loss": 0.4437, "step": 6925, "task_loss": 0.24213634431362152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2497648298740387, "epoch": 5.85, "learning_rate": 8.18137906049994e-06, "loss": 0.3159, "step": 6926, "task_loss": 0.37568017840385437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39501601457595825, "epoch": 5.85, "learning_rate": 8.17534114237411e-06, "loss": 0.2353, "step": 6927, "task_loss": 0.2249157577753067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35291868448257446, "epoch": 5.86, "learning_rate": 8.16930322424828e-06, "loss": 0.3143, "step": 6928, "task_loss": 0.5408030152320862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2316785454750061, "epoch": 5.86, "learning_rate": 8.163265306122448e-06, "loss": 0.3214, "step": 6929, "task_loss": 0.4067640006542206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2988394498825073, "epoch": 5.86, "learning_rate": 8.157227387996619e-06, "loss": 0.3785, "step": 6930, "task_loss": 0.2251962572336197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34337425231933594, "epoch": 5.86, "learning_rate": 8.15118946987079e-06, "loss": 0.3453, "step": 6931, "task_loss": 0.7029772996902466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31826817989349365, "epoch": 5.86, "learning_rate": 8.145151551744958e-06, "loss": 0.3462, "step": 6932, "task_loss": 1.207232117652893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4143719971179962, "epoch": 5.86, "learning_rate": 8.139113633619129e-06, "loss": 0.3693, "step": 6933, "task_loss": 0.24867968261241913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24876585602760315, "epoch": 5.86, "learning_rate": 8.133075715493298e-06, "loss": 0.2907, "step": 6934, "task_loss": 0.14556612074375153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.278322696685791, "epoch": 5.86, "learning_rate": 8.127037797367468e-06, "loss": 0.2466, "step": 6935, "task_loss": 0.6291283369064331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4449074864387512, "epoch": 5.86, "learning_rate": 8.120999879241637e-06, "loss": 0.3466, "step": 6936, "task_loss": 0.4079383611679077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41736796498298645, "epoch": 5.86, "learning_rate": 8.114961961115808e-06, "loss": 0.3837, "step": 6937, "task_loss": 0.5565938353538513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35962462425231934, "epoch": 5.86, "learning_rate": 8.108924042989978e-06, "loss": 0.3083, "step": 6938, "task_loss": 0.08485987782478333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2522091865539551, "epoch": 5.87, "learning_rate": 8.102886124864147e-06, "loss": 0.2401, "step": 6939, "task_loss": 0.40754544734954834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25657913088798523, "epoch": 5.87, "learning_rate": 8.096848206738316e-06, "loss": 0.2742, "step": 6940, "task_loss": 0.19435499608516693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23798483610153198, "epoch": 5.87, "learning_rate": 8.090810288612487e-06, "loss": 0.3388, "step": 6941, "task_loss": 0.7750011682510376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33186861872673035, "epoch": 5.87, "learning_rate": 8.084772370486657e-06, "loss": 0.3622, "step": 6942, "task_loss": 0.1890406608581543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3699893057346344, "epoch": 5.87, "learning_rate": 8.078734452360826e-06, "loss": 0.4058, "step": 6943, "task_loss": 0.22767053544521332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18172264099121094, "epoch": 5.87, "learning_rate": 8.072696534234995e-06, "loss": 0.3021, "step": 6944, "task_loss": 0.7029262185096741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14594635367393494, "epoch": 5.87, "learning_rate": 8.066658616109166e-06, "loss": 0.271, "step": 6945, "task_loss": 0.831615686416626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22930222749710083, "epoch": 5.87, "learning_rate": 8.060620697983336e-06, "loss": 0.3211, "step": 6946, "task_loss": 0.4647558033466339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3028051257133484, "epoch": 5.87, "learning_rate": 8.054582779857505e-06, "loss": 0.2833, "step": 6947, "task_loss": 0.5685875415802002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5860946178436279, "epoch": 5.87, "learning_rate": 8.048544861731676e-06, "loss": 0.3578, "step": 6948, "task_loss": 0.9530767798423767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1236053854227066, "epoch": 5.87, "learning_rate": 8.042506943605845e-06, "loss": 0.2043, "step": 6949, "task_loss": 0.347981333732605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18207204341888428, "epoch": 5.87, "learning_rate": 8.036469025480015e-06, "loss": 0.2658, "step": 6950, "task_loss": 0.4545683264732361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.430461585521698, "epoch": 5.88, "learning_rate": 8.030431107354184e-06, "loss": 0.4046, "step": 6951, "task_loss": 1.353684425354004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2855958342552185, "epoch": 5.88, "learning_rate": 8.024393189228355e-06, "loss": 0.3818, "step": 6952, "task_loss": 0.2881585955619812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26874321699142456, "epoch": 5.88, "learning_rate": 8.018355271102525e-06, "loss": 0.2838, "step": 6953, "task_loss": 1.009974479675293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20960883796215057, "epoch": 5.88, "learning_rate": 8.012317352976694e-06, "loss": 0.3402, "step": 6954, "task_loss": 0.06255648285150528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36535173654556274, "epoch": 5.88, "learning_rate": 8.006279434850863e-06, "loss": 0.2932, "step": 6955, "task_loss": 0.6899385452270508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27451881766319275, "epoch": 5.88, "learning_rate": 8.000241516725033e-06, "loss": 0.3664, "step": 6956, "task_loss": 0.6030134558677673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3930412232875824, "epoch": 5.88, "learning_rate": 7.994203598599204e-06, "loss": 0.4132, "step": 6957, "task_loss": 0.6958878636360168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22965285181999207, "epoch": 5.88, "learning_rate": 7.988165680473373e-06, "loss": 0.2906, "step": 6958, "task_loss": 0.6122719645500183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32595720887184143, "epoch": 5.88, "learning_rate": 7.982127762347542e-06, "loss": 0.31, "step": 6959, "task_loss": 0.6583118438720703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27678364515304565, "epoch": 5.88, "learning_rate": 7.976089844221712e-06, "loss": 0.3892, "step": 6960, "task_loss": 1.2737817764282227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2563132047653198, "epoch": 5.88, "learning_rate": 7.970051926095883e-06, "loss": 0.2762, "step": 6961, "task_loss": 0.3915124237537384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1695837378501892, "epoch": 5.88, "learning_rate": 7.964014007970052e-06, "loss": 0.2999, "step": 6962, "task_loss": 0.709972620010376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1774826943874359, "epoch": 5.89, "learning_rate": 7.957976089844222e-06, "loss": 0.3625, "step": 6963, "task_loss": 0.3500571846961975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1799248456954956, "epoch": 5.89, "learning_rate": 7.951938171718391e-06, "loss": 0.2544, "step": 6964, "task_loss": 0.11650200188159943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2673722505569458, "epoch": 5.89, "learning_rate": 7.945900253592562e-06, "loss": 0.3906, "step": 6965, "task_loss": 1.6083725690841675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2277776449918747, "epoch": 5.89, "learning_rate": 7.93986233546673e-06, "loss": 0.2967, "step": 6966, "task_loss": 0.18719246983528137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.278236448764801, "epoch": 5.89, "learning_rate": 7.933824417340901e-06, "loss": 0.2942, "step": 6967, "task_loss": 0.6166061758995056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19050592184066772, "epoch": 5.89, "learning_rate": 7.927786499215072e-06, "loss": 0.2471, "step": 6968, "task_loss": 0.4415123164653778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3393349051475525, "epoch": 5.89, "learning_rate": 7.92174858108924e-06, "loss": 0.3252, "step": 6969, "task_loss": 1.0507609844207764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.12144578248262405, "epoch": 5.89, "learning_rate": 7.91571066296341e-06, "loss": 0.1955, "step": 6970, "task_loss": 0.07100570201873779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2992318272590637, "epoch": 5.89, "learning_rate": 7.90967274483758e-06, "loss": 0.2861, "step": 6971, "task_loss": 0.2073819935321808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37063467502593994, "epoch": 5.89, "learning_rate": 7.90363482671175e-06, "loss": 0.3507, "step": 6972, "task_loss": 1.1149733066558838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2682878375053406, "epoch": 5.89, "learning_rate": 7.897596908585921e-06, "loss": 0.28, "step": 6973, "task_loss": 0.5422324538230896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4290103018283844, "epoch": 5.89, "learning_rate": 7.891558990460088e-06, "loss": 0.42, "step": 6974, "task_loss": 1.3020131587982178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33529478311538696, "epoch": 5.9, "learning_rate": 7.885521072334259e-06, "loss": 0.3712, "step": 6975, "task_loss": 0.5425577759742737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32339656352996826, "epoch": 5.9, "learning_rate": 7.87948315420843e-06, "loss": 0.3772, "step": 6976, "task_loss": 0.4771836996078491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3611831068992615, "epoch": 5.9, "learning_rate": 7.8734452360826e-06, "loss": 0.4559, "step": 6977, "task_loss": 0.9376106858253479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24350716173648834, "epoch": 5.9, "learning_rate": 7.867407317956769e-06, "loss": 0.387, "step": 6978, "task_loss": 0.5392793416976929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17761439085006714, "epoch": 5.9, "learning_rate": 7.861369399830938e-06, "loss": 0.3831, "step": 6979, "task_loss": 0.18128900229930878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33905360102653503, "epoch": 5.9, "learning_rate": 7.855331481705108e-06, "loss": 0.3545, "step": 6980, "task_loss": 0.6859490275382996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3054667115211487, "epoch": 5.9, "learning_rate": 7.849293563579279e-06, "loss": 0.3021, "step": 6981, "task_loss": 0.45557957887649536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2357938289642334, "epoch": 5.9, "learning_rate": 7.843255645453448e-06, "loss": 0.24, "step": 6982, "task_loss": 0.5057955384254456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1637999266386032, "epoch": 5.9, "learning_rate": 7.837217727327619e-06, "loss": 0.33, "step": 6983, "task_loss": 0.21570365130901337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24297094345092773, "epoch": 5.9, "learning_rate": 7.831179809201787e-06, "loss": 0.3181, "step": 6984, "task_loss": 0.5463876128196716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2482021450996399, "epoch": 5.9, "learning_rate": 7.825141891075958e-06, "loss": 0.3423, "step": 6985, "task_loss": 0.24654807150363922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26054686307907104, "epoch": 5.9, "learning_rate": 7.819103972950127e-06, "loss": 0.2449, "step": 6986, "task_loss": 0.3455338478088379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26585090160369873, "epoch": 5.91, "learning_rate": 7.813066054824297e-06, "loss": 0.2756, "step": 6987, "task_loss": 0.2802761495113373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45111870765686035, "epoch": 5.91, "learning_rate": 7.807028136698468e-06, "loss": 0.3789, "step": 6988, "task_loss": 0.8447740077972412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.215242400765419, "epoch": 5.91, "learning_rate": 7.800990218572637e-06, "loss": 0.238, "step": 6989, "task_loss": 0.44346803426742554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4608376920223236, "epoch": 5.91, "learning_rate": 7.794952300446806e-06, "loss": 0.3286, "step": 6990, "task_loss": 1.353793740272522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3481484651565552, "epoch": 5.91, "learning_rate": 7.788914382320976e-06, "loss": 0.3517, "step": 6991, "task_loss": 0.9165235757827759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2963334619998932, "epoch": 5.91, "learning_rate": 7.782876464195147e-06, "loss": 0.3188, "step": 6992, "task_loss": 0.30601510405540466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28343629837036133, "epoch": 5.91, "learning_rate": 7.776838546069316e-06, "loss": 0.3611, "step": 6993, "task_loss": 0.44355714321136475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21617668867111206, "epoch": 5.91, "learning_rate": 7.770800627943485e-06, "loss": 0.3442, "step": 6994, "task_loss": 0.9768191576004028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31039655208587646, "epoch": 5.91, "learning_rate": 7.764762709817655e-06, "loss": 0.3487, "step": 6995, "task_loss": 0.7242206335067749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18719831109046936, "epoch": 5.91, "learning_rate": 7.758724791691826e-06, "loss": 0.2828, "step": 6996, "task_loss": 0.32302430272102356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2702338993549347, "epoch": 5.91, "learning_rate": 7.752686873565995e-06, "loss": 0.3175, "step": 6997, "task_loss": 1.0435454845428467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3298068940639496, "epoch": 5.91, "learning_rate": 7.746648955440165e-06, "loss": 0.4201, "step": 6998, "task_loss": 0.8158190846443176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38228631019592285, "epoch": 5.92, "learning_rate": 7.740611037314334e-06, "loss": 0.3514, "step": 6999, "task_loss": 0.43993479013442993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45040708780288696, "epoch": 5.92, "learning_rate": 7.734573119188505e-06, "loss": 0.331, "step": 7000, "task_loss": 0.396212637424469 }, { "epoch": 5.92, "eval_accuracy": 0.918930693069307, "eval_loss": 0.20861737430095673, "eval_runtime": 330.3406, "eval_samples_per_second": 76.436, "eval_steps_per_second": 0.599, "step": 7000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3220592141151428, "epoch": 5.92, "learning_rate": 7.728535201062674e-06, "loss": 0.3397, "step": 7001, "task_loss": 0.9784749150276184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4047633409500122, "epoch": 5.92, "learning_rate": 7.722497282936844e-06, "loss": 0.3826, "step": 7002, "task_loss": 0.4035263955593109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22544477880001068, "epoch": 5.92, "learning_rate": 7.716459364811015e-06, "loss": 0.3086, "step": 7003, "task_loss": 1.119242787361145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4081020951271057, "epoch": 5.92, "learning_rate": 7.710421446685184e-06, "loss": 0.3838, "step": 7004, "task_loss": 0.4910404086112976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1835099458694458, "epoch": 5.92, "learning_rate": 7.704383528559352e-06, "loss": 0.3759, "step": 7005, "task_loss": 0.05745773762464523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.216181218624115, "epoch": 5.92, "learning_rate": 7.698345610433523e-06, "loss": 0.3335, "step": 7006, "task_loss": 0.028080236166715622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4156988859176636, "epoch": 5.92, "learning_rate": 7.692307692307694e-06, "loss": 0.3784, "step": 7007, "task_loss": 0.930466890335083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41984647512435913, "epoch": 5.92, "learning_rate": 7.686269774181862e-06, "loss": 0.4128, "step": 7008, "task_loss": 0.8530255556106567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15693803131580353, "epoch": 5.92, "learning_rate": 7.680231856056031e-06, "loss": 0.2724, "step": 7009, "task_loss": 0.6214428544044495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30121278762817383, "epoch": 5.93, "learning_rate": 7.674193937930202e-06, "loss": 0.2503, "step": 7010, "task_loss": 0.5716965198516846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20565083622932434, "epoch": 5.93, "learning_rate": 7.668156019804372e-06, "loss": 0.2111, "step": 7011, "task_loss": 0.24817299842834473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28933271765708923, "epoch": 5.93, "learning_rate": 7.662118101678541e-06, "loss": 0.4107, "step": 7012, "task_loss": 0.6412742137908936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2882945239543915, "epoch": 5.93, "learning_rate": 7.65608018355271e-06, "loss": 0.3036, "step": 7013, "task_loss": 0.6686651706695557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25475621223449707, "epoch": 5.93, "learning_rate": 7.65004226542688e-06, "loss": 0.2951, "step": 7014, "task_loss": 0.7069004774093628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3100872039794922, "epoch": 5.93, "learning_rate": 7.644004347301051e-06, "loss": 0.3652, "step": 7015, "task_loss": 0.20687468349933624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22106938064098358, "epoch": 5.93, "learning_rate": 7.63796642917522e-06, "loss": 0.3921, "step": 7016, "task_loss": 0.8747066259384155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33665525913238525, "epoch": 5.93, "learning_rate": 7.63192851104939e-06, "loss": 0.2979, "step": 7017, "task_loss": 0.2200300097465515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4624388813972473, "epoch": 5.93, "learning_rate": 7.62589059292356e-06, "loss": 0.4657, "step": 7018, "task_loss": 0.337744802236557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18337520956993103, "epoch": 5.93, "learning_rate": 7.6198526747977294e-06, "loss": 0.3122, "step": 7019, "task_loss": 0.6683509349822998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2644696533679962, "epoch": 5.93, "learning_rate": 7.6138147566719e-06, "loss": 0.2341, "step": 7020, "task_loss": 0.3560025691986084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2799276113510132, "epoch": 5.93, "learning_rate": 7.60777683854607e-06, "loss": 0.33, "step": 7021, "task_loss": 0.2880317270755768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22880005836486816, "epoch": 5.94, "learning_rate": 7.60173892042024e-06, "loss": 0.3272, "step": 7022, "task_loss": 0.5340491533279419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2853899300098419, "epoch": 5.94, "learning_rate": 7.595701002294408e-06, "loss": 0.3228, "step": 7023, "task_loss": 0.6526864767074585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15789806842803955, "epoch": 5.94, "learning_rate": 7.589663084168579e-06, "loss": 0.2806, "step": 7024, "task_loss": 0.5087361335754395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18065860867500305, "epoch": 5.94, "learning_rate": 7.583625166042749e-06, "loss": 0.2338, "step": 7025, "task_loss": 0.3071988821029663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4041765332221985, "epoch": 5.94, "learning_rate": 7.577587247916919e-06, "loss": 0.3536, "step": 7026, "task_loss": 0.1483376920223236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4421304762363434, "epoch": 5.94, "learning_rate": 7.571549329791089e-06, "loss": 0.2854, "step": 7027, "task_loss": 1.0726537704467773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3233320713043213, "epoch": 5.94, "learning_rate": 7.565511411665258e-06, "loss": 0.4048, "step": 7028, "task_loss": 0.5001831650733948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38653841614723206, "epoch": 5.94, "learning_rate": 7.5594734935394275e-06, "loss": 0.3702, "step": 7029, "task_loss": 0.955941915512085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20533215999603271, "epoch": 5.94, "learning_rate": 7.553435575413598e-06, "loss": 0.2944, "step": 7030, "task_loss": 0.4573954641819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28944307565689087, "epoch": 5.94, "learning_rate": 7.547397657287768e-06, "loss": 0.3646, "step": 7031, "task_loss": 0.7992157340049744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3507658541202545, "epoch": 5.94, "learning_rate": 7.5413597391619375e-06, "loss": 0.3447, "step": 7032, "task_loss": 1.344385027885437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38217705488204956, "epoch": 5.94, "learning_rate": 7.535321821036106e-06, "loss": 0.4217, "step": 7033, "task_loss": 1.0719752311706543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2716076970100403, "epoch": 5.95, "learning_rate": 7.529283902910277e-06, "loss": 0.3139, "step": 7034, "task_loss": 0.5698462128639221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35239851474761963, "epoch": 5.95, "learning_rate": 7.523245984784447e-06, "loss": 0.3238, "step": 7035, "task_loss": 0.09007629007101059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24314972758293152, "epoch": 5.95, "learning_rate": 7.5172080666586164e-06, "loss": 0.3611, "step": 7036, "task_loss": 0.2710627317428589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26611220836639404, "epoch": 5.95, "learning_rate": 7.511170148532787e-06, "loss": 0.3222, "step": 7037, "task_loss": 0.44983428716659546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3555961847305298, "epoch": 5.95, "learning_rate": 7.505132230406955e-06, "loss": 0.2618, "step": 7038, "task_loss": 0.6980183124542236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2710441052913666, "epoch": 5.95, "learning_rate": 7.499094312281126e-06, "loss": 0.3323, "step": 7039, "task_loss": 1.4090818166732788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42886218428611755, "epoch": 5.95, "learning_rate": 7.493056394155295e-06, "loss": 0.4476, "step": 7040, "task_loss": 0.24105112254619598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3564131259918213, "epoch": 5.95, "learning_rate": 7.487018476029466e-06, "loss": 0.2962, "step": 7041, "task_loss": 0.0506402850151062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.319448322057724, "epoch": 5.95, "learning_rate": 7.480980557903636e-06, "loss": 0.2683, "step": 7042, "task_loss": 0.3236501216888428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3485803008079529, "epoch": 5.95, "learning_rate": 7.4749426397778045e-06, "loss": 0.3309, "step": 7043, "task_loss": 1.0515892505645752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23109419643878937, "epoch": 5.95, "learning_rate": 7.468904721651974e-06, "loss": 0.3519, "step": 7044, "task_loss": 0.6811333298683167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23878435790538788, "epoch": 5.95, "learning_rate": 7.462866803526145e-06, "loss": 0.2956, "step": 7045, "task_loss": 0.6487923860549927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23210497200489044, "epoch": 5.96, "learning_rate": 7.4568288854003145e-06, "loss": 0.2681, "step": 7046, "task_loss": 0.0768669843673706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3468450903892517, "epoch": 5.96, "learning_rate": 7.450790967274484e-06, "loss": 0.3762, "step": 7047, "task_loss": 0.6893743276596069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2755965292453766, "epoch": 5.96, "learning_rate": 7.444753049148653e-06, "loss": 0.271, "step": 7048, "task_loss": 0.897481381893158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3448454737663269, "epoch": 5.96, "learning_rate": 7.438715131022824e-06, "loss": 0.35, "step": 7049, "task_loss": 0.6836541891098022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3367498815059662, "epoch": 5.96, "learning_rate": 7.432677212896993e-06, "loss": 0.2981, "step": 7050, "task_loss": 1.2441320419311523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26446133852005005, "epoch": 5.96, "learning_rate": 7.426639294771163e-06, "loss": 0.2893, "step": 7051, "task_loss": 0.3754515051841736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24491849541664124, "epoch": 5.96, "learning_rate": 7.420601376645334e-06, "loss": 0.2879, "step": 7052, "task_loss": 1.1938608884811401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39240458607673645, "epoch": 5.96, "learning_rate": 7.414563458519503e-06, "loss": 0.3182, "step": 7053, "task_loss": 0.6215736865997314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33023643493652344, "epoch": 5.96, "learning_rate": 7.408525540393672e-06, "loss": 0.2934, "step": 7054, "task_loss": 1.3592265844345093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2677484452724457, "epoch": 5.96, "learning_rate": 7.402487622267842e-06, "loss": 0.4094, "step": 7055, "task_loss": 0.5984075665473938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20390357077121735, "epoch": 5.96, "learning_rate": 7.396449704142013e-06, "loss": 0.3043, "step": 7056, "task_loss": 0.17017978429794312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22048363089561462, "epoch": 5.96, "learning_rate": 7.390411786016182e-06, "loss": 0.343, "step": 7057, "task_loss": 0.5967419147491455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.253814160823822, "epoch": 5.97, "learning_rate": 7.384373867890351e-06, "loss": 0.307, "step": 7058, "task_loss": 0.4622407853603363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2610442638397217, "epoch": 5.97, "learning_rate": 7.378335949764521e-06, "loss": 0.3246, "step": 7059, "task_loss": 0.6716785430908203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28579041361808777, "epoch": 5.97, "learning_rate": 7.3722980316386915e-06, "loss": 0.3428, "step": 7060, "task_loss": 0.6908806562423706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40054479241371155, "epoch": 5.97, "learning_rate": 7.366260113512861e-06, "loss": 0.4092, "step": 7061, "task_loss": 0.6688803434371948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3657313883304596, "epoch": 5.97, "learning_rate": 7.360222195387032e-06, "loss": 0.3679, "step": 7062, "task_loss": 0.39293015003204346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21227183938026428, "epoch": 5.97, "learning_rate": 7.3541842772612e-06, "loss": 0.2487, "step": 7063, "task_loss": 0.18881265819072723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2646484375, "epoch": 5.97, "learning_rate": 7.34814635913537e-06, "loss": 0.256, "step": 7064, "task_loss": 0.3717053234577179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23863577842712402, "epoch": 5.97, "learning_rate": 7.34210844100954e-06, "loss": 0.3271, "step": 7065, "task_loss": 0.7388043403625488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22377218306064606, "epoch": 5.97, "learning_rate": 7.336070522883711e-06, "loss": 0.3252, "step": 7066, "task_loss": 0.10952365398406982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46604156494140625, "epoch": 5.97, "learning_rate": 7.33003260475788e-06, "loss": 0.3718, "step": 7067, "task_loss": 0.7768674492835999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2845110297203064, "epoch": 5.97, "learning_rate": 7.323994686632049e-06, "loss": 0.2421, "step": 7068, "task_loss": 0.5547423958778381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2534538209438324, "epoch": 5.97, "learning_rate": 7.317956768506219e-06, "loss": 0.336, "step": 7069, "task_loss": 0.9673245549201965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1756129413843155, "epoch": 5.98, "learning_rate": 7.31191885038039e-06, "loss": 0.3131, "step": 7070, "task_loss": 0.3760894238948822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47409743070602417, "epoch": 5.98, "learning_rate": 7.305880932254559e-06, "loss": 0.3615, "step": 7071, "task_loss": 0.8932098746299744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1738022118806839, "epoch": 5.98, "learning_rate": 7.299843014128729e-06, "loss": 0.3507, "step": 7072, "task_loss": 0.6860185861587524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23855632543563843, "epoch": 5.98, "learning_rate": 7.293805096002898e-06, "loss": 0.3389, "step": 7073, "task_loss": 0.4259927272796631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17362700402736664, "epoch": 5.98, "learning_rate": 7.2877671778770685e-06, "loss": 0.2711, "step": 7074, "task_loss": 0.1952124685049057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23443840444087982, "epoch": 5.98, "learning_rate": 7.281729259751238e-06, "loss": 0.4551, "step": 7075, "task_loss": 1.1715424060821533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2646521031856537, "epoch": 5.98, "learning_rate": 7.275691341625408e-06, "loss": 0.2701, "step": 7076, "task_loss": 0.2802142798900604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1517234444618225, "epoch": 5.98, "learning_rate": 7.2696534234995785e-06, "loss": 0.2701, "step": 7077, "task_loss": 0.3428773880004883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18542514741420746, "epoch": 5.98, "learning_rate": 7.263615505373747e-06, "loss": 0.2579, "step": 7078, "task_loss": 0.202164426445961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21772143244743347, "epoch": 5.98, "learning_rate": 7.257577587247917e-06, "loss": 0.2868, "step": 7079, "task_loss": 0.7855547070503235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22590868175029755, "epoch": 5.98, "learning_rate": 7.251539669122087e-06, "loss": 0.3065, "step": 7080, "task_loss": 0.5438039302825928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.367354154586792, "epoch": 5.99, "learning_rate": 7.245501750996257e-06, "loss": 0.2735, "step": 7081, "task_loss": 0.47704067826271057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20535868406295776, "epoch": 5.99, "learning_rate": 7.239463832870427e-06, "loss": 0.2175, "step": 7082, "task_loss": 0.7430224418640137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35776233673095703, "epoch": 5.99, "learning_rate": 7.233425914744596e-06, "loss": 0.3812, "step": 7083, "task_loss": 0.6628845930099487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5561478137969971, "epoch": 5.99, "learning_rate": 7.227387996618766e-06, "loss": 0.426, "step": 7084, "task_loss": 0.6556566953659058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16441130638122559, "epoch": 5.99, "learning_rate": 7.221350078492936e-06, "loss": 0.265, "step": 7085, "task_loss": 0.9349076151847839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20114029943943024, "epoch": 5.99, "learning_rate": 7.215312160367106e-06, "loss": 0.2188, "step": 7086, "task_loss": 0.5351748466491699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.265122652053833, "epoch": 5.99, "learning_rate": 7.209274242241277e-06, "loss": 0.2329, "step": 7087, "task_loss": 0.7013180255889893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1496926248073578, "epoch": 5.99, "learning_rate": 7.203236324115445e-06, "loss": 0.2477, "step": 7088, "task_loss": 0.30489033460617065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5601909160614014, "epoch": 5.99, "learning_rate": 7.197198405989615e-06, "loss": 0.4555, "step": 7089, "task_loss": 1.524379014968872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38772255182266235, "epoch": 5.99, "learning_rate": 7.191160487863785e-06, "loss": 0.2903, "step": 7090, "task_loss": 0.6796432733535767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34532272815704346, "epoch": 5.99, "learning_rate": 7.1851225697379555e-06, "loss": 0.322, "step": 7091, "task_loss": 0.8394181132316589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3112182021141052, "epoch": 5.99, "learning_rate": 7.1790846516121235e-06, "loss": 0.3778, "step": 7092, "task_loss": 0.9844138622283936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38141685724258423, "epoch": 6.0, "learning_rate": 7.173046733486294e-06, "loss": 0.3284, "step": 7093, "task_loss": 0.23082810640335083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5887683033943176, "epoch": 6.0, "learning_rate": 7.167008815360464e-06, "loss": 0.4354, "step": 7094, "task_loss": 0.9659625291824341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23914942145347595, "epoch": 6.0, "learning_rate": 7.160970897234634e-06, "loss": 0.3747, "step": 7095, "task_loss": 0.1158970445394516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2294548600912094, "epoch": 6.0, "learning_rate": 7.154932979108804e-06, "loss": 0.3073, "step": 7096, "task_loss": 0.7556275725364685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28676337003707886, "epoch": 6.0, "learning_rate": 7.148895060982973e-06, "loss": 0.3913, "step": 7097, "task_loss": 0.9594449996948242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38256576657295227, "epoch": 6.0, "learning_rate": 7.142857142857143e-06, "loss": 0.3495, "step": 7098, "task_loss": 1.0141462087631226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3972775936126709, "epoch": 6.0, "learning_rate": 7.136819224731313e-06, "loss": 0.668, "step": 7099, "task_loss": 0.32584959268569946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3276614546775818, "epoch": 6.0, "learning_rate": 7.130781306605483e-06, "loss": 0.3021, "step": 7100, "task_loss": 0.5535512566566467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24407625198364258, "epoch": 6.0, "learning_rate": 7.124743388479653e-06, "loss": 0.372, "step": 7101, "task_loss": 0.531032919883728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28047749400138855, "epoch": 6.0, "learning_rate": 7.118705470353822e-06, "loss": 0.4299, "step": 7102, "task_loss": 0.4140965938568115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2046005129814148, "epoch": 6.0, "learning_rate": 7.112667552227991e-06, "loss": 0.3267, "step": 7103, "task_loss": 0.5855927467346191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30715876817703247, "epoch": 6.01, "learning_rate": 7.106629634102162e-06, "loss": 0.345, "step": 7104, "task_loss": 0.6304429173469543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.213723286986351, "epoch": 6.01, "learning_rate": 7.100591715976332e-06, "loss": 0.3477, "step": 7105, "task_loss": 0.2878100574016571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3984605371952057, "epoch": 6.01, "learning_rate": 7.094553797850502e-06, "loss": 0.2922, "step": 7106, "task_loss": 0.3169901967048645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5915418863296509, "epoch": 6.01, "learning_rate": 7.08851587972467e-06, "loss": 0.401, "step": 7107, "task_loss": 0.602306067943573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35499608516693115, "epoch": 6.01, "learning_rate": 7.082477961598841e-06, "loss": 0.3644, "step": 7108, "task_loss": 0.6903975605964661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27154242992401123, "epoch": 6.01, "learning_rate": 7.0764400434730105e-06, "loss": 0.3587, "step": 7109, "task_loss": 1.5395166873931885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44963139295578003, "epoch": 6.01, "learning_rate": 7.070402125347181e-06, "loss": 0.3291, "step": 7110, "task_loss": 0.19854889810085297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28041696548461914, "epoch": 6.01, "learning_rate": 7.064364207221351e-06, "loss": 0.4308, "step": 7111, "task_loss": 0.4215468168258667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23624666035175323, "epoch": 6.01, "learning_rate": 7.05832628909552e-06, "loss": 0.2915, "step": 7112, "task_loss": 0.09181546419858932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4255000352859497, "epoch": 6.01, "learning_rate": 7.0522883709696894e-06, "loss": 0.3708, "step": 7113, "task_loss": 0.9513877034187317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2279524952173233, "epoch": 6.01, "learning_rate": 7.04625045284386e-06, "loss": 0.3175, "step": 7114, "task_loss": 0.23696641623973846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1329088807106018, "epoch": 6.01, "learning_rate": 7.04021253471803e-06, "loss": 0.209, "step": 7115, "task_loss": 0.03138301149010658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4122864603996277, "epoch": 6.02, "learning_rate": 7.0341746165921994e-06, "loss": 0.3428, "step": 7116, "task_loss": 0.23326323926448822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36483561992645264, "epoch": 6.02, "learning_rate": 7.028136698466368e-06, "loss": 0.2708, "step": 7117, "task_loss": 0.5643088221549988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22746676206588745, "epoch": 6.02, "learning_rate": 7.022098780340539e-06, "loss": 0.2895, "step": 7118, "task_loss": 0.12814947962760925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19026751816272736, "epoch": 6.02, "learning_rate": 7.016060862214709e-06, "loss": 0.2793, "step": 7119, "task_loss": 0.5272297859191895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3343832790851593, "epoch": 6.02, "learning_rate": 7.010022944088878e-06, "loss": 0.3301, "step": 7120, "task_loss": 0.7080164551734924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6512521505355835, "epoch": 6.02, "learning_rate": 7.003985025963049e-06, "loss": 0.388, "step": 7121, "task_loss": 0.7708722949028015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4953049421310425, "epoch": 6.02, "learning_rate": 6.997947107837218e-06, "loss": 0.2983, "step": 7122, "task_loss": 0.7466191649436951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27909737825393677, "epoch": 6.02, "learning_rate": 6.9919091897113875e-06, "loss": 0.3555, "step": 7123, "task_loss": 0.5713152289390564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3074741065502167, "epoch": 6.02, "learning_rate": 6.985871271585557e-06, "loss": 0.3851, "step": 7124, "task_loss": 0.6611383557319641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33839938044548035, "epoch": 6.02, "learning_rate": 6.979833353459728e-06, "loss": 0.3247, "step": 7125, "task_loss": 0.6042219400405884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22431842982769012, "epoch": 6.02, "learning_rate": 6.9737954353338975e-06, "loss": 0.3317, "step": 7126, "task_loss": 0.8374233841896057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30115875601768494, "epoch": 6.02, "learning_rate": 6.967757517208066e-06, "loss": 0.2648, "step": 7127, "task_loss": 0.42615067958831787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22820454835891724, "epoch": 6.03, "learning_rate": 6.961719599082236e-06, "loss": 0.2984, "step": 7128, "task_loss": 0.3396134078502655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2508254051208496, "epoch": 6.03, "learning_rate": 6.955681680956407e-06, "loss": 0.3803, "step": 7129, "task_loss": 0.6327781677246094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16717056930065155, "epoch": 6.03, "learning_rate": 6.9496437628305764e-06, "loss": 0.33, "step": 7130, "task_loss": 0.40849506855010986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1886851191520691, "epoch": 6.03, "learning_rate": 6.943605844704747e-06, "loss": 0.3907, "step": 7131, "task_loss": 0.10970267653465271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2834419012069702, "epoch": 6.03, "learning_rate": 6.937567926578915e-06, "loss": 0.2355, "step": 7132, "task_loss": 0.3404216170310974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2203027755022049, "epoch": 6.03, "learning_rate": 6.931530008453086e-06, "loss": 0.3413, "step": 7133, "task_loss": 0.5990549921989441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3003913462162018, "epoch": 6.03, "learning_rate": 6.925492090327255e-06, "loss": 0.3191, "step": 7134, "task_loss": 0.69185471534729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27977728843688965, "epoch": 6.03, "learning_rate": 6.919454172201426e-06, "loss": 0.2463, "step": 7135, "task_loss": 0.4223666191101074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19681304693222046, "epoch": 6.03, "learning_rate": 6.913416254075596e-06, "loss": 0.2787, "step": 7136, "task_loss": 0.44191691279411316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18940195441246033, "epoch": 6.03, "learning_rate": 6.9073783359497645e-06, "loss": 0.3157, "step": 7137, "task_loss": 0.6826296448707581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36215487122535706, "epoch": 6.03, "learning_rate": 6.901340417823934e-06, "loss": 0.2955, "step": 7138, "task_loss": 0.487962931394577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22416061162948608, "epoch": 6.03, "learning_rate": 6.895302499698105e-06, "loss": 0.2328, "step": 7139, "task_loss": 0.430345356464386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22846734523773193, "epoch": 6.04, "learning_rate": 6.8892645815722745e-06, "loss": 0.3134, "step": 7140, "task_loss": 0.44915032386779785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2649359703063965, "epoch": 6.04, "learning_rate": 6.883226663446444e-06, "loss": 0.2951, "step": 7141, "task_loss": 0.2308601588010788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38197997212409973, "epoch": 6.04, "learning_rate": 6.877188745320613e-06, "loss": 0.3179, "step": 7142, "task_loss": 0.28131604194641113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3300563395023346, "epoch": 6.04, "learning_rate": 6.871150827194784e-06, "loss": 0.2898, "step": 7143, "task_loss": 0.3239456117153168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3056347370147705, "epoch": 6.04, "learning_rate": 6.865112909068953e-06, "loss": 0.3333, "step": 7144, "task_loss": 0.5786159634590149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30238574743270874, "epoch": 6.04, "learning_rate": 6.859074990943123e-06, "loss": 0.347, "step": 7145, "task_loss": 0.8814088702201843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25698667764663696, "epoch": 6.04, "learning_rate": 6.853037072817294e-06, "loss": 0.3019, "step": 7146, "task_loss": 0.7271972298622131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2597278356552124, "epoch": 6.04, "learning_rate": 6.846999154691463e-06, "loss": 0.2992, "step": 7147, "task_loss": 0.7048519849777222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27971118688583374, "epoch": 6.04, "learning_rate": 6.840961236565632e-06, "loss": 0.2926, "step": 7148, "task_loss": 0.3427642285823822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23149842023849487, "epoch": 6.04, "learning_rate": 6.834923318439802e-06, "loss": 0.3011, "step": 7149, "task_loss": 0.5466868877410889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2368066906929016, "epoch": 6.04, "learning_rate": 6.828885400313973e-06, "loss": 0.3769, "step": 7150, "task_loss": 0.532660961151123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23274487257003784, "epoch": 6.04, "learning_rate": 6.822847482188142e-06, "loss": 0.2733, "step": 7151, "task_loss": 0.1897253841161728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35416877269744873, "epoch": 6.05, "learning_rate": 6.816809564062311e-06, "loss": 0.3479, "step": 7152, "task_loss": 0.9005858302116394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14932379126548767, "epoch": 6.05, "learning_rate": 6.810771645936481e-06, "loss": 0.263, "step": 7153, "task_loss": 0.0716807171702385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2508079409599304, "epoch": 6.05, "learning_rate": 6.8047337278106515e-06, "loss": 0.3574, "step": 7154, "task_loss": 0.09921501576900482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.163803368806839, "epoch": 6.05, "learning_rate": 6.798695809684821e-06, "loss": 0.3256, "step": 7155, "task_loss": 0.07661934942007065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37021902203559875, "epoch": 6.05, "learning_rate": 6.792657891558992e-06, "loss": 0.3838, "step": 7156, "task_loss": 0.7977172136306763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1819590926170349, "epoch": 6.05, "learning_rate": 6.78661997343316e-06, "loss": 0.3572, "step": 7157, "task_loss": 0.43205538392066956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18558314442634583, "epoch": 6.05, "learning_rate": 6.78058205530733e-06, "loss": 0.2588, "step": 7158, "task_loss": 0.3668860197067261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22251251339912415, "epoch": 6.05, "learning_rate": 6.7745441371815e-06, "loss": 0.2824, "step": 7159, "task_loss": 0.3818557560443878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19423216581344604, "epoch": 6.05, "learning_rate": 6.768506219055671e-06, "loss": 0.3405, "step": 7160, "task_loss": 0.13166777789592743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41115647554397583, "epoch": 6.05, "learning_rate": 6.76246830092984e-06, "loss": 0.4487, "step": 7161, "task_loss": 0.9518448710441589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27435919642448425, "epoch": 6.05, "learning_rate": 6.756430382804009e-06, "loss": 0.3016, "step": 7162, "task_loss": 0.20483911037445068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2974226772785187, "epoch": 6.05, "learning_rate": 6.750392464678179e-06, "loss": 0.2502, "step": 7163, "task_loss": 0.30759796500205994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24072055518627167, "epoch": 6.06, "learning_rate": 6.744354546552349e-06, "loss": 0.3885, "step": 7164, "task_loss": 0.3312627673149109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2849824130535126, "epoch": 6.06, "learning_rate": 6.738316628426519e-06, "loss": 0.344, "step": 7165, "task_loss": 0.5747207403182983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18936830759048462, "epoch": 6.06, "learning_rate": 6.732278710300689e-06, "loss": 0.2069, "step": 7166, "task_loss": 0.046619050204753876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23063796758651733, "epoch": 6.06, "learning_rate": 6.726240792174858e-06, "loss": 0.4123, "step": 7167, "task_loss": 0.03430229797959328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.238184854388237, "epoch": 6.06, "learning_rate": 6.720202874049028e-06, "loss": 0.3225, "step": 7168, "task_loss": 0.08372077345848083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18656274676322937, "epoch": 6.06, "learning_rate": 6.714164955923198e-06, "loss": 0.2871, "step": 7169, "task_loss": 0.2512434124946594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28237754106521606, "epoch": 6.06, "learning_rate": 6.708127037797368e-06, "loss": 0.2962, "step": 7170, "task_loss": 0.31702524423599243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3769277036190033, "epoch": 6.06, "learning_rate": 6.7020891196715385e-06, "loss": 0.3542, "step": 7171, "task_loss": 0.7989686131477356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29815760254859924, "epoch": 6.06, "learning_rate": 6.6960512015457065e-06, "loss": 0.297, "step": 7172, "task_loss": 0.07172943651676178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33515989780426025, "epoch": 6.06, "learning_rate": 6.690013283419877e-06, "loss": 0.4102, "step": 7173, "task_loss": 1.2507691383361816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1887315958738327, "epoch": 6.06, "learning_rate": 6.683975365294047e-06, "loss": 0.4715, "step": 7174, "task_loss": 0.6895838975906372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2909701466560364, "epoch": 6.07, "learning_rate": 6.677937447168217e-06, "loss": 0.302, "step": 7175, "task_loss": 0.5165374279022217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30206435918807983, "epoch": 6.07, "learning_rate": 6.671899529042387e-06, "loss": 0.3309, "step": 7176, "task_loss": 0.14548633992671967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19664327800273895, "epoch": 6.07, "learning_rate": 6.665861610916556e-06, "loss": 0.3629, "step": 7177, "task_loss": 0.051027510315179825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3996765613555908, "epoch": 6.07, "learning_rate": 6.659823692790726e-06, "loss": 0.3885, "step": 7178, "task_loss": 0.5588759779930115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1844104826450348, "epoch": 6.07, "learning_rate": 6.653785774664896e-06, "loss": 0.2483, "step": 7179, "task_loss": 0.8794468641281128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3519577980041504, "epoch": 6.07, "learning_rate": 6.647747856539066e-06, "loss": 0.3411, "step": 7180, "task_loss": 1.055120587348938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27597710490226746, "epoch": 6.07, "learning_rate": 6.641709938413235e-06, "loss": 0.2955, "step": 7181, "task_loss": 0.6513684391975403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4734048843383789, "epoch": 6.07, "learning_rate": 6.635672020287405e-06, "loss": 0.2887, "step": 7182, "task_loss": 0.7095674872398376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19497928023338318, "epoch": 6.07, "learning_rate": 6.629634102161575e-06, "loss": 0.2801, "step": 7183, "task_loss": 0.09174716472625732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2718554735183716, "epoch": 6.07, "learning_rate": 6.623596184035745e-06, "loss": 0.3977, "step": 7184, "task_loss": 0.06672835350036621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24175794422626495, "epoch": 6.07, "learning_rate": 6.617558265909915e-06, "loss": 0.3016, "step": 7185, "task_loss": 0.6951236128807068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31283700466156006, "epoch": 6.07, "learning_rate": 6.6115203477840835e-06, "loss": 0.2513, "step": 7186, "task_loss": 0.4200376570224762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18315543234348297, "epoch": 6.08, "learning_rate": 6.605482429658254e-06, "loss": 0.3259, "step": 7187, "task_loss": 0.2660863697528839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47846701741218567, "epoch": 6.08, "learning_rate": 6.599444511532424e-06, "loss": 0.3696, "step": 7188, "task_loss": 1.2596654891967773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3312249779701233, "epoch": 6.08, "learning_rate": 6.5934065934065935e-06, "loss": 0.3609, "step": 7189, "task_loss": 0.9982279539108276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4801411032676697, "epoch": 6.08, "learning_rate": 6.587368675280764e-06, "loss": 0.374, "step": 7190, "task_loss": 1.4956051111221313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29880374670028687, "epoch": 6.08, "learning_rate": 6.581330757154933e-06, "loss": 0.2993, "step": 7191, "task_loss": 0.7626286745071411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24110616743564606, "epoch": 6.08, "learning_rate": 6.575292839029103e-06, "loss": 0.3043, "step": 7192, "task_loss": 0.5680070519447327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3007383644580841, "epoch": 6.08, "learning_rate": 6.5692549209032724e-06, "loss": 0.3039, "step": 7193, "task_loss": 0.8799261450767517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26643499732017517, "epoch": 6.08, "learning_rate": 6.563217002777443e-06, "loss": 0.2879, "step": 7194, "task_loss": 0.25840914249420166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.256481409072876, "epoch": 6.08, "learning_rate": 6.557179084651613e-06, "loss": 0.2336, "step": 7195, "task_loss": 0.18747185170650482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2001219093799591, "epoch": 6.08, "learning_rate": 6.551141166525782e-06, "loss": 0.3126, "step": 7196, "task_loss": 0.9220877885818481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23870672285556793, "epoch": 6.08, "learning_rate": 6.545103248399951e-06, "loss": 0.2807, "step": 7197, "task_loss": 0.3806220293045044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18295499682426453, "epoch": 6.08, "learning_rate": 6.539065330274122e-06, "loss": 0.2126, "step": 7198, "task_loss": 0.31863832473754883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27567046880722046, "epoch": 6.09, "learning_rate": 6.533027412148292e-06, "loss": 0.4145, "step": 7199, "task_loss": 0.8323246240615845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28402599692344666, "epoch": 6.09, "learning_rate": 6.526989494022462e-06, "loss": 0.3298, "step": 7200, "task_loss": 0.5403230786323547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.290897399187088, "epoch": 6.09, "learning_rate": 6.52095157589663e-06, "loss": 0.4074, "step": 7201, "task_loss": 0.09280747175216675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2698715031147003, "epoch": 6.09, "learning_rate": 6.514913657770801e-06, "loss": 0.2518, "step": 7202, "task_loss": 0.16785882413387299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2477683424949646, "epoch": 6.09, "learning_rate": 6.5088757396449705e-06, "loss": 0.3004, "step": 7203, "task_loss": 0.8397983908653259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1478601098060608, "epoch": 6.09, "learning_rate": 6.502837821519141e-06, "loss": 0.3136, "step": 7204, "task_loss": 0.7782301306724548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28757235407829285, "epoch": 6.09, "learning_rate": 6.496799903393311e-06, "loss": 0.3867, "step": 7205, "task_loss": 0.3357979953289032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3988846242427826, "epoch": 6.09, "learning_rate": 6.49076198526748e-06, "loss": 0.4054, "step": 7206, "task_loss": 0.9789754152297974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38404691219329834, "epoch": 6.09, "learning_rate": 6.484724067141649e-06, "loss": 0.3568, "step": 7207, "task_loss": 0.10531356930732727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13197866082191467, "epoch": 6.09, "learning_rate": 6.47868614901582e-06, "loss": 0.3345, "step": 7208, "task_loss": 0.7554012537002563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24499285221099854, "epoch": 6.09, "learning_rate": 6.47264823088999e-06, "loss": 0.2815, "step": 7209, "task_loss": 0.15364906191825867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2072940468788147, "epoch": 6.09, "learning_rate": 6.4666103127641594e-06, "loss": 0.2266, "step": 7210, "task_loss": 0.5497735142707825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3333819508552551, "epoch": 6.1, "learning_rate": 6.460572394638328e-06, "loss": 0.3256, "step": 7211, "task_loss": 0.6873918175697327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.12099142372608185, "epoch": 6.1, "learning_rate": 6.454534476512499e-06, "loss": 0.2934, "step": 7212, "task_loss": 0.059382364153862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2851664125919342, "epoch": 6.1, "learning_rate": 6.448496558386669e-06, "loss": 0.3026, "step": 7213, "task_loss": 0.35164231061935425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4070589542388916, "epoch": 6.1, "learning_rate": 6.442458640260838e-06, "loss": 0.333, "step": 7214, "task_loss": 0.24642838537693024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2290160357952118, "epoch": 6.1, "learning_rate": 6.436420722135009e-06, "loss": 0.2608, "step": 7215, "task_loss": 0.3111751675605774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.365635484457016, "epoch": 6.1, "learning_rate": 6.430382804009178e-06, "loss": 0.3736, "step": 7216, "task_loss": 0.47724881768226624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48462533950805664, "epoch": 6.1, "learning_rate": 6.4243448858833475e-06, "loss": 0.3663, "step": 7217, "task_loss": 0.7283416986465454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37700068950653076, "epoch": 6.1, "learning_rate": 6.418306967757517e-06, "loss": 0.3302, "step": 7218, "task_loss": 0.37514257431030273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17565947771072388, "epoch": 6.1, "learning_rate": 6.412269049631688e-06, "loss": 0.3604, "step": 7219, "task_loss": 0.9128443002700806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33093345165252686, "epoch": 6.1, "learning_rate": 6.4062311315058575e-06, "loss": 0.3549, "step": 7220, "task_loss": 0.9067699909210205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21238841116428375, "epoch": 6.1, "learning_rate": 6.400193213380026e-06, "loss": 0.2607, "step": 7221, "task_loss": 0.4835890531539917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2049015462398529, "epoch": 6.1, "learning_rate": 6.394155295254196e-06, "loss": 0.2546, "step": 7222, "task_loss": 0.06758490204811096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41271892189979553, "epoch": 6.11, "learning_rate": 6.388117377128367e-06, "loss": 0.3744, "step": 7223, "task_loss": 0.7066802978515625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31275904178619385, "epoch": 6.11, "learning_rate": 6.382079459002536e-06, "loss": 0.3528, "step": 7224, "task_loss": 0.8539870977401733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28326818346977234, "epoch": 6.11, "learning_rate": 6.376041540876707e-06, "loss": 0.2485, "step": 7225, "task_loss": 0.8932639360427856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2985093593597412, "epoch": 6.11, "learning_rate": 6.370003622750875e-06, "loss": 0.4064, "step": 7226, "task_loss": 0.07319838553667068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2644343078136444, "epoch": 6.11, "learning_rate": 6.363965704625046e-06, "loss": 0.2945, "step": 7227, "task_loss": 0.37754836678504944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30890244245529175, "epoch": 6.11, "learning_rate": 6.357927786499215e-06, "loss": 0.2562, "step": 7228, "task_loss": 0.561816930770874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5732179880142212, "epoch": 6.11, "learning_rate": 6.351889868373385e-06, "loss": 0.4089, "step": 7229, "task_loss": 0.493012011051178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.12824293971061707, "epoch": 6.11, "learning_rate": 6.345851950247556e-06, "loss": 0.244, "step": 7230, "task_loss": 0.14005261659622192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37110328674316406, "epoch": 6.11, "learning_rate": 6.3398140321217245e-06, "loss": 0.3043, "step": 7231, "task_loss": 0.365360289812088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2724701166152954, "epoch": 6.11, "learning_rate": 6.333776113995894e-06, "loss": 0.3122, "step": 7232, "task_loss": 0.10749351233243942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22556425631046295, "epoch": 6.11, "learning_rate": 6.327738195870064e-06, "loss": 0.2817, "step": 7233, "task_loss": 0.5341182947158813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26129332184791565, "epoch": 6.11, "learning_rate": 6.3217002777442345e-06, "loss": 0.3171, "step": 7234, "task_loss": 0.3188205361366272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3964064121246338, "epoch": 6.12, "learning_rate": 6.315662359618404e-06, "loss": 0.3661, "step": 7235, "task_loss": 0.11545424908399582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3265972137451172, "epoch": 6.12, "learning_rate": 6.309624441492573e-06, "loss": 0.2771, "step": 7236, "task_loss": 0.5769368410110474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.554074764251709, "epoch": 6.12, "learning_rate": 6.303586523366743e-06, "loss": 0.3627, "step": 7237, "task_loss": 0.8368777632713318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18700280785560608, "epoch": 6.12, "learning_rate": 6.297548605240913e-06, "loss": 0.2026, "step": 7238, "task_loss": 0.11727967113256454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3294069766998291, "epoch": 6.12, "learning_rate": 6.291510687115083e-06, "loss": 0.3127, "step": 7239, "task_loss": 1.222719430923462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2009299099445343, "epoch": 6.12, "learning_rate": 6.285472768989254e-06, "loss": 0.3048, "step": 7240, "task_loss": 0.11747930198907852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3133801817893982, "epoch": 6.12, "learning_rate": 6.279434850863422e-06, "loss": 0.2902, "step": 7241, "task_loss": 0.7316831350326538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23254571855068207, "epoch": 6.12, "learning_rate": 6.273396932737592e-06, "loss": 0.2917, "step": 7242, "task_loss": 0.6290581226348877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1637595146894455, "epoch": 6.12, "learning_rate": 6.267359014611762e-06, "loss": 0.2657, "step": 7243, "task_loss": 0.6948050856590271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4295480251312256, "epoch": 6.12, "learning_rate": 6.261321096485933e-06, "loss": 0.3328, "step": 7244, "task_loss": 0.5218859910964966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.715961217880249, "epoch": 6.12, "learning_rate": 6.255283178360102e-06, "loss": 0.4349, "step": 7245, "task_loss": 0.6192834973335266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4337505102157593, "epoch": 6.13, "learning_rate": 6.249245260234272e-06, "loss": 0.3923, "step": 7246, "task_loss": 0.6686428785324097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.49132096767425537, "epoch": 6.13, "learning_rate": 6.243207342108441e-06, "loss": 0.351, "step": 7247, "task_loss": 0.8690724968910217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46693119406700134, "epoch": 6.13, "learning_rate": 6.2371694239826115e-06, "loss": 0.3147, "step": 7248, "task_loss": 0.799595832824707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34672316908836365, "epoch": 6.13, "learning_rate": 6.23113150585678e-06, "loss": 0.2867, "step": 7249, "task_loss": 0.44889694452285767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25956404209136963, "epoch": 6.13, "learning_rate": 6.225093587730951e-06, "loss": 0.3104, "step": 7250, "task_loss": 0.7429122924804688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21489432454109192, "epoch": 6.13, "learning_rate": 6.219055669605121e-06, "loss": 0.2839, "step": 7251, "task_loss": 0.7540549039840698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36456364393234253, "epoch": 6.13, "learning_rate": 6.21301775147929e-06, "loss": 0.3657, "step": 7252, "task_loss": 0.1399783492088318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2364429533481598, "epoch": 6.13, "learning_rate": 6.20697983335346e-06, "loss": 0.3412, "step": 7253, "task_loss": 0.1473049521446228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2281879186630249, "epoch": 6.13, "learning_rate": 6.20094191522763e-06, "loss": 0.4283, "step": 7254, "task_loss": 0.7672611474990845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19145764410495758, "epoch": 6.13, "learning_rate": 6.1949039971017996e-06, "loss": 0.2399, "step": 7255, "task_loss": 0.47052091360092163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24975818395614624, "epoch": 6.13, "learning_rate": 6.188866078975969e-06, "loss": 0.3182, "step": 7256, "task_loss": 0.42114678025245667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.505205512046814, "epoch": 6.13, "learning_rate": 6.182828160850139e-06, "loss": 0.2667, "step": 7257, "task_loss": 0.6042804718017578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2744886577129364, "epoch": 6.14, "learning_rate": 6.176790242724309e-06, "loss": 0.3371, "step": 7258, "task_loss": 0.7580016255378723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21702590584754944, "epoch": 6.14, "learning_rate": 6.1707523245984785e-06, "loss": 0.1874, "step": 7259, "task_loss": 0.14909997582435608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3479987382888794, "epoch": 6.14, "learning_rate": 6.164714406472648e-06, "loss": 0.3863, "step": 7260, "task_loss": 0.6497183442115784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31531620025634766, "epoch": 6.14, "learning_rate": 6.158676488346819e-06, "loss": 0.2962, "step": 7261, "task_loss": 0.04007694125175476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1859782487154007, "epoch": 6.14, "learning_rate": 6.152638570220988e-06, "loss": 0.2432, "step": 7262, "task_loss": 0.012367150746285915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23703715205192566, "epoch": 6.14, "learning_rate": 6.146600652095158e-06, "loss": 0.3717, "step": 7263, "task_loss": 0.6828657388687134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24640652537345886, "epoch": 6.14, "learning_rate": 6.140562733969327e-06, "loss": 0.329, "step": 7264, "task_loss": 2.5697877407073975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23589833080768585, "epoch": 6.14, "learning_rate": 6.134524815843498e-06, "loss": 0.3558, "step": 7265, "task_loss": 0.7840480208396912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6107752323150635, "epoch": 6.14, "learning_rate": 6.128486897717667e-06, "loss": 0.5717, "step": 7266, "task_loss": 0.32049334049224854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26965078711509705, "epoch": 6.14, "learning_rate": 6.122448979591837e-06, "loss": 0.2094, "step": 7267, "task_loss": 0.8032212257385254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2824428081512451, "epoch": 6.14, "learning_rate": 6.116411061466007e-06, "loss": 0.2917, "step": 7268, "task_loss": 0.6629906296730042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3151687979698181, "epoch": 6.14, "learning_rate": 6.1103731433401765e-06, "loss": 0.3468, "step": 7269, "task_loss": 0.28064605593681335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33048588037490845, "epoch": 6.15, "learning_rate": 6.104335225214346e-06, "loss": 0.3682, "step": 7270, "task_loss": 0.050172314047813416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4262372851371765, "epoch": 6.15, "learning_rate": 6.098297307088517e-06, "loss": 0.3861, "step": 7271, "task_loss": 0.4774538278579712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21989303827285767, "epoch": 6.15, "learning_rate": 6.092259388962686e-06, "loss": 0.231, "step": 7272, "task_loss": 0.13859257102012634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.613686203956604, "epoch": 6.15, "learning_rate": 6.086221470836856e-06, "loss": 0.3294, "step": 7273, "task_loss": 0.4004322588443756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2132348120212555, "epoch": 6.15, "learning_rate": 6.080183552711025e-06, "loss": 0.2502, "step": 7274, "task_loss": 0.5849582552909851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2756907045841217, "epoch": 6.15, "learning_rate": 6.074145634585196e-06, "loss": 0.4037, "step": 7275, "task_loss": 0.7988424301147461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37805354595184326, "epoch": 6.15, "learning_rate": 6.0681077164593655e-06, "loss": 0.4086, "step": 7276, "task_loss": 0.982825517654419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.315887451171875, "epoch": 6.15, "learning_rate": 6.062069798333535e-06, "loss": 0.3231, "step": 7277, "task_loss": 0.28975409269332886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30292201042175293, "epoch": 6.15, "learning_rate": 6.056031880207705e-06, "loss": 0.2812, "step": 7278, "task_loss": 0.0632702186703682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26848191022872925, "epoch": 6.15, "learning_rate": 6.049993962081875e-06, "loss": 0.3238, "step": 7279, "task_loss": 0.4292033016681671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2810266613960266, "epoch": 6.15, "learning_rate": 6.043956043956044e-06, "loss": 0.2298, "step": 7280, "task_loss": 0.6890804767608643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2554793357849121, "epoch": 6.15, "learning_rate": 6.037918125830214e-06, "loss": 0.3463, "step": 7281, "task_loss": 0.5018296241760254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3187631070613861, "epoch": 6.16, "learning_rate": 6.031880207704384e-06, "loss": 0.3139, "step": 7282, "task_loss": 0.35852155089378357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23294109106063843, "epoch": 6.16, "learning_rate": 6.0258422895785535e-06, "loss": 0.3629, "step": 7283, "task_loss": 0.5957685112953186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2564356327056885, "epoch": 6.16, "learning_rate": 6.019804371452723e-06, "loss": 0.2274, "step": 7284, "task_loss": 0.4467790424823761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14529062807559967, "epoch": 6.16, "learning_rate": 6.013766453326893e-06, "loss": 0.2745, "step": 7285, "task_loss": 0.31285691261291504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23718561232089996, "epoch": 6.16, "learning_rate": 6.0077285352010635e-06, "loss": 0.2805, "step": 7286, "task_loss": 0.42459237575531006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2298784852027893, "epoch": 6.16, "learning_rate": 6.0016906170752324e-06, "loss": 0.2882, "step": 7287, "task_loss": 0.09812603145837784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3130866587162018, "epoch": 6.16, "learning_rate": 5.995652698949403e-06, "loss": 0.351, "step": 7288, "task_loss": 0.3915368914604187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30487075448036194, "epoch": 6.16, "learning_rate": 5.989614780823572e-06, "loss": 0.4218, "step": 7289, "task_loss": 1.36982262134552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3306959867477417, "epoch": 6.16, "learning_rate": 5.9835768626977424e-06, "loss": 0.314, "step": 7290, "task_loss": 0.7172718644142151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28600865602493286, "epoch": 6.16, "learning_rate": 5.977538944571911e-06, "loss": 0.3218, "step": 7291, "task_loss": 0.1658528447151184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20052072405815125, "epoch": 6.16, "learning_rate": 5.971501026446082e-06, "loss": 0.3435, "step": 7292, "task_loss": 0.4600405991077423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4624254107475281, "epoch": 6.16, "learning_rate": 5.965463108320252e-06, "loss": 0.4152, "step": 7293, "task_loss": 0.7559293508529663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19309522211551666, "epoch": 6.17, "learning_rate": 5.959425190194421e-06, "loss": 0.2978, "step": 7294, "task_loss": 0.5059049725532532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20719251036643982, "epoch": 6.17, "learning_rate": 5.953387272068591e-06, "loss": 0.2621, "step": 7295, "task_loss": 0.1657606065273285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41011086106300354, "epoch": 6.17, "learning_rate": 5.947349353942761e-06, "loss": 0.35, "step": 7296, "task_loss": 0.6476063132286072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35859739780426025, "epoch": 6.17, "learning_rate": 5.9413114358169305e-06, "loss": 0.3511, "step": 7297, "task_loss": 0.36657002568244934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47687017917633057, "epoch": 6.17, "learning_rate": 5.9352735176911e-06, "loss": 0.288, "step": 7298, "task_loss": 0.3649967908859253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2898961305618286, "epoch": 6.17, "learning_rate": 5.92923559956527e-06, "loss": 0.3175, "step": 7299, "task_loss": 0.7054463624954224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16564419865608215, "epoch": 6.17, "learning_rate": 5.92319768143944e-06, "loss": 0.2533, "step": 7300, "task_loss": 0.5708388090133667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4670461118221283, "epoch": 6.17, "learning_rate": 5.917159763313609e-06, "loss": 0.3166, "step": 7301, "task_loss": 1.0088536739349365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2061532735824585, "epoch": 6.17, "learning_rate": 5.911121845187779e-06, "loss": 0.2542, "step": 7302, "task_loss": 0.24382509291172028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23267245292663574, "epoch": 6.17, "learning_rate": 5.90508392706195e-06, "loss": 0.3336, "step": 7303, "task_loss": 0.23219886422157288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20917163789272308, "epoch": 6.17, "learning_rate": 5.899046008936119e-06, "loss": 0.347, "step": 7304, "task_loss": 0.5389791131019592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37938472628593445, "epoch": 6.17, "learning_rate": 5.893008090810289e-06, "loss": 0.4001, "step": 7305, "task_loss": 1.3187952041625977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19741781055927277, "epoch": 6.18, "learning_rate": 5.886970172684458e-06, "loss": 0.2795, "step": 7306, "task_loss": 0.017613153904676437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35122957825660706, "epoch": 6.18, "learning_rate": 5.880932254558629e-06, "loss": 0.3046, "step": 7307, "task_loss": 0.14849352836608887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5118317604064941, "epoch": 6.18, "learning_rate": 5.874894336432798e-06, "loss": 0.3028, "step": 7308, "task_loss": 1.4553170204162598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31572219729423523, "epoch": 6.18, "learning_rate": 5.868856418306968e-06, "loss": 0.3519, "step": 7309, "task_loss": 0.8357099890708923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16908767819404602, "epoch": 6.18, "learning_rate": 5.862818500181138e-06, "loss": 0.3041, "step": 7310, "task_loss": 0.18174909055233002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3928294777870178, "epoch": 6.18, "learning_rate": 5.8567805820553075e-06, "loss": 0.3232, "step": 7311, "task_loss": 0.2569441795349121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3333682119846344, "epoch": 6.18, "learning_rate": 5.850742663929477e-06, "loss": 0.3302, "step": 7312, "task_loss": 0.4108341336250305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2158995270729065, "epoch": 6.18, "learning_rate": 5.844704745803648e-06, "loss": 0.3089, "step": 7313, "task_loss": 0.14969070255756378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.275423526763916, "epoch": 6.18, "learning_rate": 5.838666827677817e-06, "loss": 0.3356, "step": 7314, "task_loss": 0.2617095410823822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32542774081230164, "epoch": 6.18, "learning_rate": 5.832628909551987e-06, "loss": 0.3075, "step": 7315, "task_loss": 0.6572245955467224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33283281326293945, "epoch": 6.18, "learning_rate": 5.826590991426156e-06, "loss": 0.3291, "step": 7316, "task_loss": 0.3912583887577057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4048759341239929, "epoch": 6.19, "learning_rate": 5.820553073300327e-06, "loss": 0.3728, "step": 7317, "task_loss": 0.2754739224910736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17610880732536316, "epoch": 6.19, "learning_rate": 5.814515155174496e-06, "loss": 0.2816, "step": 7318, "task_loss": 0.5135335922241211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.291262149810791, "epoch": 6.19, "learning_rate": 5.808477237048666e-06, "loss": 0.3029, "step": 7319, "task_loss": 0.589095950126648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3340892195701599, "epoch": 6.19, "learning_rate": 5.802439318922836e-06, "loss": 0.3011, "step": 7320, "task_loss": 0.914637565612793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36775338649749756, "epoch": 6.19, "learning_rate": 5.796401400797006e-06, "loss": 0.3759, "step": 7321, "task_loss": 1.932281494140625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2765195369720459, "epoch": 6.19, "learning_rate": 5.790363482671175e-06, "loss": 0.2972, "step": 7322, "task_loss": 0.888322114944458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27683380246162415, "epoch": 6.19, "learning_rate": 5.784325564545345e-06, "loss": 0.331, "step": 7323, "task_loss": 0.5994772911071777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23757082223892212, "epoch": 6.19, "learning_rate": 5.778287646419515e-06, "loss": 0.3339, "step": 7324, "task_loss": 0.2903706729412079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35454261302948, "epoch": 6.19, "learning_rate": 5.7722497282936845e-06, "loss": 0.3084, "step": 7325, "task_loss": 0.626806914806366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1453142613172531, "epoch": 6.19, "learning_rate": 5.766211810167854e-06, "loss": 0.3164, "step": 7326, "task_loss": 0.1537088304758072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22148647904396057, "epoch": 6.19, "learning_rate": 5.760173892042024e-06, "loss": 0.2144, "step": 7327, "task_loss": 0.1297454982995987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18414682149887085, "epoch": 6.19, "learning_rate": 5.7541359739161945e-06, "loss": 0.2586, "step": 7328, "task_loss": 0.5567471385002136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25418150424957275, "epoch": 6.2, "learning_rate": 5.748098055790363e-06, "loss": 0.3215, "step": 7329, "task_loss": 0.6419572234153748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25726619362831116, "epoch": 6.2, "learning_rate": 5.742060137664534e-06, "loss": 0.3244, "step": 7330, "task_loss": 0.2954053580760956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2274969220161438, "epoch": 6.2, "learning_rate": 5.736022219538703e-06, "loss": 0.2642, "step": 7331, "task_loss": 0.4663292169570923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.48708468675613403, "epoch": 6.2, "learning_rate": 5.729984301412873e-06, "loss": 0.3899, "step": 7332, "task_loss": 0.6017398238182068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33334511518478394, "epoch": 6.2, "learning_rate": 5.723946383287043e-06, "loss": 0.3213, "step": 7333, "task_loss": 0.7636258006095886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24741074442863464, "epoch": 6.2, "learning_rate": 5.717908465161213e-06, "loss": 0.2792, "step": 7334, "task_loss": 0.5674071311950684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29293328523635864, "epoch": 6.2, "learning_rate": 5.7118705470353826e-06, "loss": 0.307, "step": 7335, "task_loss": 0.8693770170211792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1464649736881256, "epoch": 6.2, "learning_rate": 5.705832628909552e-06, "loss": 0.2471, "step": 7336, "task_loss": 0.05505220592021942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2569911479949951, "epoch": 6.2, "learning_rate": 5.699794710783722e-06, "loss": 0.3052, "step": 7337, "task_loss": 0.3912438452243805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3039405345916748, "epoch": 6.2, "learning_rate": 5.693756792657892e-06, "loss": 0.298, "step": 7338, "task_loss": 0.7213567495346069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1569645255804062, "epoch": 6.2, "learning_rate": 5.6877188745320615e-06, "loss": 0.2419, "step": 7339, "task_loss": 0.5214962363243103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.09557930380105972, "epoch": 6.2, "learning_rate": 5.681680956406232e-06, "loss": 0.292, "step": 7340, "task_loss": 0.28290900588035583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.204722598195076, "epoch": 6.21, "learning_rate": 5.675643038280401e-06, "loss": 0.2871, "step": 7341, "task_loss": 0.46198248863220215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3507539629936218, "epoch": 6.21, "learning_rate": 5.6696051201545715e-06, "loss": 0.287, "step": 7342, "task_loss": 0.785375714302063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13380557298660278, "epoch": 6.21, "learning_rate": 5.66356720202874e-06, "loss": 0.2541, "step": 7343, "task_loss": 0.22475524246692657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.312081515789032, "epoch": 6.21, "learning_rate": 5.65752928390291e-06, "loss": 0.2647, "step": 7344, "task_loss": 0.39534294605255127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37430405616760254, "epoch": 6.21, "learning_rate": 5.651491365777081e-06, "loss": 0.3439, "step": 7345, "task_loss": 0.863775908946991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15402404963970184, "epoch": 6.21, "learning_rate": 5.6454534476512495e-06, "loss": 0.3235, "step": 7346, "task_loss": 0.4074091911315918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2600761651992798, "epoch": 6.21, "learning_rate": 5.63941552952542e-06, "loss": 0.2698, "step": 7347, "task_loss": 0.46591684222221375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15354248881340027, "epoch": 6.21, "learning_rate": 5.633377611399589e-06, "loss": 0.2884, "step": 7348, "task_loss": 0.16337674856185913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4145463705062866, "epoch": 6.21, "learning_rate": 5.6273396932737596e-06, "loss": 0.3399, "step": 7349, "task_loss": 1.2764108180999756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13737742602825165, "epoch": 6.21, "learning_rate": 5.621301775147929e-06, "loss": 0.2826, "step": 7350, "task_loss": 0.13239145278930664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20552793145179749, "epoch": 6.21, "learning_rate": 5.615263857022099e-06, "loss": 0.3143, "step": 7351, "task_loss": 1.0703543424606323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6504502892494202, "epoch": 6.21, "learning_rate": 5.609225938896269e-06, "loss": 0.4946, "step": 7352, "task_loss": 0.872931718826294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38477224111557007, "epoch": 6.22, "learning_rate": 5.6031880207704385e-06, "loss": 0.4141, "step": 7353, "task_loss": 0.8193022608757019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46135297417640686, "epoch": 6.22, "learning_rate": 5.597150102644608e-06, "loss": 0.4528, "step": 7354, "task_loss": 0.8058938980102539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.586283802986145, "epoch": 6.22, "learning_rate": 5.591112184518779e-06, "loss": 0.4005, "step": 7355, "task_loss": 0.6859341263771057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22900807857513428, "epoch": 6.22, "learning_rate": 5.585074266392948e-06, "loss": 0.2916, "step": 7356, "task_loss": 0.5716953277587891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2773395776748657, "epoch": 6.22, "learning_rate": 5.579036348267118e-06, "loss": 0.3247, "step": 7357, "task_loss": 0.5333839654922485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32248586416244507, "epoch": 6.22, "learning_rate": 5.572998430141287e-06, "loss": 0.2631, "step": 7358, "task_loss": 0.3725232183933258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3534919023513794, "epoch": 6.22, "learning_rate": 5.566960512015458e-06, "loss": 0.3082, "step": 7359, "task_loss": 0.8104996681213379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23192831873893738, "epoch": 6.22, "learning_rate": 5.560922593889627e-06, "loss": 0.235, "step": 7360, "task_loss": 0.4466322362422943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5983520746231079, "epoch": 6.22, "learning_rate": 5.554884675763797e-06, "loss": 0.405, "step": 7361, "task_loss": 0.6729022860527039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3218199908733368, "epoch": 6.22, "learning_rate": 5.548846757637967e-06, "loss": 0.2623, "step": 7362, "task_loss": 0.5339111089706421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3091704845428467, "epoch": 6.22, "learning_rate": 5.5428088395121365e-06, "loss": 0.3256, "step": 7363, "task_loss": 0.4057878851890564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4450223743915558, "epoch": 6.22, "learning_rate": 5.536770921386306e-06, "loss": 0.3511, "step": 7364, "task_loss": 0.855107307434082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46804893016815186, "epoch": 6.23, "learning_rate": 5.530733003260476e-06, "loss": 0.3859, "step": 7365, "task_loss": 0.506304144859314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.08045507967472076, "epoch": 6.23, "learning_rate": 5.524695085134646e-06, "loss": 0.3077, "step": 7366, "task_loss": 0.047819092869758606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39093899726867676, "epoch": 6.23, "learning_rate": 5.5186571670088154e-06, "loss": 0.2872, "step": 7367, "task_loss": 0.6804456114768982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14606916904449463, "epoch": 6.23, "learning_rate": 5.512619248882985e-06, "loss": 0.2603, "step": 7368, "task_loss": 0.2343638837337494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30323371291160583, "epoch": 6.23, "learning_rate": 5.506581330757155e-06, "loss": 0.309, "step": 7369, "task_loss": 0.6385443210601807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.290368914604187, "epoch": 6.23, "learning_rate": 5.5005434126313255e-06, "loss": 0.2832, "step": 7370, "task_loss": 0.3468688726425171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4670025706291199, "epoch": 6.23, "learning_rate": 5.494505494505494e-06, "loss": 0.3801, "step": 7371, "task_loss": 0.511773943901062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28642913699150085, "epoch": 6.23, "learning_rate": 5.488467576379665e-06, "loss": 0.3661, "step": 7372, "task_loss": 0.2726462781429291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6968756914138794, "epoch": 6.23, "learning_rate": 5.482429658253834e-06, "loss": 0.4152, "step": 7373, "task_loss": 1.1584017276763916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2794973850250244, "epoch": 6.23, "learning_rate": 5.476391740128004e-06, "loss": 0.2852, "step": 7374, "task_loss": 0.3126874268054962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28439536690711975, "epoch": 6.23, "learning_rate": 5.470353822002174e-06, "loss": 0.2753, "step": 7375, "task_loss": 0.5314283967018127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.389899343252182, "epoch": 6.23, "learning_rate": 5.464315903876344e-06, "loss": 0.3621, "step": 7376, "task_loss": 1.044019341468811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3940943777561188, "epoch": 6.24, "learning_rate": 5.4582779857505135e-06, "loss": 0.3707, "step": 7377, "task_loss": 0.43387678265571594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1977355033159256, "epoch": 6.24, "learning_rate": 5.452240067624683e-06, "loss": 0.3575, "step": 7378, "task_loss": 0.8477073311805725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2532358467578888, "epoch": 6.24, "learning_rate": 5.446202149498853e-06, "loss": 0.3308, "step": 7379, "task_loss": 0.6158466935157776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24112243950366974, "epoch": 6.24, "learning_rate": 5.440164231373023e-06, "loss": 0.2609, "step": 7380, "task_loss": 0.5651446580886841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2879619598388672, "epoch": 6.24, "learning_rate": 5.434126313247192e-06, "loss": 0.2813, "step": 7381, "task_loss": 1.0842636823654175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3108000159263611, "epoch": 6.24, "learning_rate": 5.428088395121363e-06, "loss": 0.3574, "step": 7382, "task_loss": 0.3897767663002014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25881144404411316, "epoch": 6.24, "learning_rate": 5.422050476995532e-06, "loss": 0.322, "step": 7383, "task_loss": 0.8448782563209534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18522438406944275, "epoch": 6.24, "learning_rate": 5.4160125588697024e-06, "loss": 0.2861, "step": 7384, "task_loss": 0.07900186628103256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2043771743774414, "epoch": 6.24, "learning_rate": 5.409974640743871e-06, "loss": 0.2909, "step": 7385, "task_loss": 0.6971206665039062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33751219511032104, "epoch": 6.24, "learning_rate": 5.403936722618042e-06, "loss": 0.2493, "step": 7386, "task_loss": 1.002614140510559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2826710045337677, "epoch": 6.24, "learning_rate": 5.397898804492212e-06, "loss": 0.3484, "step": 7387, "task_loss": 0.8350661993026733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30110597610473633, "epoch": 6.24, "learning_rate": 5.391860886366381e-06, "loss": 0.3408, "step": 7388, "task_loss": 0.21129849553108215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23367363214492798, "epoch": 6.25, "learning_rate": 5.385822968240551e-06, "loss": 0.2863, "step": 7389, "task_loss": 0.4775119721889496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5299674272537231, "epoch": 6.25, "learning_rate": 5.379785050114721e-06, "loss": 0.3359, "step": 7390, "task_loss": 0.16968591511249542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3299417495727539, "epoch": 6.25, "learning_rate": 5.3737471319888905e-06, "loss": 0.3087, "step": 7391, "task_loss": 0.6261822581291199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2472228854894638, "epoch": 6.25, "learning_rate": 5.36770921386306e-06, "loss": 0.2798, "step": 7392, "task_loss": 0.7653905749320984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3857782185077667, "epoch": 6.25, "learning_rate": 5.36167129573723e-06, "loss": 0.3644, "step": 7393, "task_loss": 0.9515523910522461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25601696968078613, "epoch": 6.25, "learning_rate": 5.3556333776114e-06, "loss": 0.3255, "step": 7394, "task_loss": 0.6962659358978271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30715012550354004, "epoch": 6.25, "learning_rate": 5.349595459485569e-06, "loss": 0.2435, "step": 7395, "task_loss": 0.3852241635322571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1798027604818344, "epoch": 6.25, "learning_rate": 5.343557541359739e-06, "loss": 0.2623, "step": 7396, "task_loss": 0.12510034441947937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37028294801712036, "epoch": 6.25, "learning_rate": 5.33751962323391e-06, "loss": 0.2944, "step": 7397, "task_loss": 0.8589933514595032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4038069248199463, "epoch": 6.25, "learning_rate": 5.3314817051080786e-06, "loss": 0.3359, "step": 7398, "task_loss": 0.06791121512651443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4065922498703003, "epoch": 6.25, "learning_rate": 5.325443786982249e-06, "loss": 0.3598, "step": 7399, "task_loss": 0.9732213616371155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20025712251663208, "epoch": 6.26, "learning_rate": 5.319405868856418e-06, "loss": 0.3053, "step": 7400, "task_loss": 0.7578831911087036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35344260931015015, "epoch": 6.26, "learning_rate": 5.313367950730589e-06, "loss": 0.3259, "step": 7401, "task_loss": 0.35862013697624207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27752500772476196, "epoch": 6.26, "learning_rate": 5.307330032604758e-06, "loss": 0.2763, "step": 7402, "task_loss": 0.32035982608795166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16206924617290497, "epoch": 6.26, "learning_rate": 5.301292114478928e-06, "loss": 0.2091, "step": 7403, "task_loss": 0.36115124821662903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20803004503250122, "epoch": 6.26, "learning_rate": 5.295254196353098e-06, "loss": 0.2414, "step": 7404, "task_loss": 0.19780537486076355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22105370461940765, "epoch": 6.26, "learning_rate": 5.2892162782272675e-06, "loss": 0.322, "step": 7405, "task_loss": 0.505027711391449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1602451503276825, "epoch": 6.26, "learning_rate": 5.283178360101437e-06, "loss": 0.2886, "step": 7406, "task_loss": 0.8240736126899719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2562323808670044, "epoch": 6.26, "learning_rate": 5.277140441975608e-06, "loss": 0.2081, "step": 7407, "task_loss": 0.2668737769126892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30449020862579346, "epoch": 6.26, "learning_rate": 5.271102523849777e-06, "loss": 0.4034, "step": 7408, "task_loss": 0.40988633036613464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4016094207763672, "epoch": 6.26, "learning_rate": 5.265064605723946e-06, "loss": 0.3122, "step": 7409, "task_loss": 0.7402150630950928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2982572317123413, "epoch": 6.26, "learning_rate": 5.259026687598116e-06, "loss": 0.3911, "step": 7410, "task_loss": 0.5343198180198669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3389758765697479, "epoch": 6.26, "learning_rate": 5.252988769472286e-06, "loss": 0.308, "step": 7411, "task_loss": 0.7520685791969299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2297784686088562, "epoch": 6.27, "learning_rate": 5.246950851346456e-06, "loss": 0.2918, "step": 7412, "task_loss": 0.7200822234153748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4071407914161682, "epoch": 6.27, "learning_rate": 5.240912933220625e-06, "loss": 0.4871, "step": 7413, "task_loss": 1.232882022857666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1775471568107605, "epoch": 6.27, "learning_rate": 5.234875015094796e-06, "loss": 0.2456, "step": 7414, "task_loss": 0.3934509754180908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2367095649242401, "epoch": 6.27, "learning_rate": 5.228837096968965e-06, "loss": 0.2824, "step": 7415, "task_loss": 0.3414698839187622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4775612950325012, "epoch": 6.27, "learning_rate": 5.222799178843135e-06, "loss": 0.4255, "step": 7416, "task_loss": 1.4220333099365234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4715122878551483, "epoch": 6.27, "learning_rate": 5.216761260717305e-06, "loss": 0.3985, "step": 7417, "task_loss": 0.4332817494869232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23972058296203613, "epoch": 6.27, "learning_rate": 5.210723342591475e-06, "loss": 0.2186, "step": 7418, "task_loss": 0.13782364130020142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14683325588703156, "epoch": 6.27, "learning_rate": 5.2046854244656445e-06, "loss": 0.2765, "step": 7419, "task_loss": 1.0278375148773193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5771911144256592, "epoch": 6.27, "learning_rate": 5.198647506339814e-06, "loss": 0.4322, "step": 7420, "task_loss": 1.408897042274475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2146473079919815, "epoch": 6.27, "learning_rate": 5.192609588213984e-06, "loss": 0.37, "step": 7421, "task_loss": 0.5135196447372437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24815648794174194, "epoch": 6.27, "learning_rate": 5.1865716700881545e-06, "loss": 0.2949, "step": 7422, "task_loss": 0.32716798782348633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23091578483581543, "epoch": 6.27, "learning_rate": 5.180533751962323e-06, "loss": 0.219, "step": 7423, "task_loss": 0.26198479533195496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21926575899124146, "epoch": 6.28, "learning_rate": 5.174495833836494e-06, "loss": 0.2766, "step": 7424, "task_loss": 0.40542298555374146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3126923739910126, "epoch": 6.28, "learning_rate": 5.168457915710663e-06, "loss": 0.3122, "step": 7425, "task_loss": 1.265037178993225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2212328016757965, "epoch": 6.28, "learning_rate": 5.162419997584833e-06, "loss": 0.2356, "step": 7426, "task_loss": 0.35704392194747925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33999037742614746, "epoch": 6.28, "learning_rate": 5.156382079459002e-06, "loss": 0.2764, "step": 7427, "task_loss": 0.5955678224563599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18909664452075958, "epoch": 6.28, "learning_rate": 5.150344161333173e-06, "loss": 0.264, "step": 7428, "task_loss": 0.21428492665290833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14913907647132874, "epoch": 6.28, "learning_rate": 5.1443062432073426e-06, "loss": 0.2462, "step": 7429, "task_loss": 0.038949351757764816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14004558324813843, "epoch": 6.28, "learning_rate": 5.138268325081512e-06, "loss": 0.2621, "step": 7430, "task_loss": 0.2724246680736542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38407352566719055, "epoch": 6.28, "learning_rate": 5.132230406955682e-06, "loss": 0.2971, "step": 7431, "task_loss": 0.3742440938949585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5022674798965454, "epoch": 6.28, "learning_rate": 5.126192488829852e-06, "loss": 0.3494, "step": 7432, "task_loss": 0.4523394703865051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.12465033680200577, "epoch": 6.28, "learning_rate": 5.1201545707040215e-06, "loss": 0.2955, "step": 7433, "task_loss": 0.3034924268722534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37824535369873047, "epoch": 6.28, "learning_rate": 5.114116652578191e-06, "loss": 0.3631, "step": 7434, "task_loss": 0.19379116594791412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16694410145282745, "epoch": 6.28, "learning_rate": 5.108078734452361e-06, "loss": 0.2852, "step": 7435, "task_loss": 0.049470990896224976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17570167779922485, "epoch": 6.29, "learning_rate": 5.102040816326531e-06, "loss": 0.2063, "step": 7436, "task_loss": 0.1269269734621048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25855061411857605, "epoch": 6.29, "learning_rate": 5.0960028982007e-06, "loss": 0.3354, "step": 7437, "task_loss": 0.2675301134586334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42163676023483276, "epoch": 6.29, "learning_rate": 5.08996498007487e-06, "loss": 0.3968, "step": 7438, "task_loss": 0.14387503266334534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3780311346054077, "epoch": 6.29, "learning_rate": 5.083927061949041e-06, "loss": 0.3205, "step": 7439, "task_loss": 0.34937721490859985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37655413150787354, "epoch": 6.29, "learning_rate": 5.0778891438232095e-06, "loss": 0.2796, "step": 7440, "task_loss": 0.7139735221862793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30158543586730957, "epoch": 6.29, "learning_rate": 5.07185122569738e-06, "loss": 0.3135, "step": 7441, "task_loss": 0.42839643359184265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36790329217910767, "epoch": 6.29, "learning_rate": 5.065813307571549e-06, "loss": 0.3831, "step": 7442, "task_loss": 0.9910699725151062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33132749795913696, "epoch": 6.29, "learning_rate": 5.0597753894457195e-06, "loss": 0.4196, "step": 7443, "task_loss": 0.9735628366470337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21953454613685608, "epoch": 6.29, "learning_rate": 5.053737471319889e-06, "loss": 0.2922, "step": 7444, "task_loss": 0.027351638302206993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1593240201473236, "epoch": 6.29, "learning_rate": 5.047699553194059e-06, "loss": 0.3565, "step": 7445, "task_loss": 0.21341830492019653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23186053335666656, "epoch": 6.29, "learning_rate": 5.041661635068229e-06, "loss": 0.2569, "step": 7446, "task_loss": 0.08873025327920914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2534548044204712, "epoch": 6.29, "learning_rate": 5.0356237169423984e-06, "loss": 0.3347, "step": 7447, "task_loss": 0.1527668684720993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36628174781799316, "epoch": 6.3, "learning_rate": 5.029585798816568e-06, "loss": 0.3298, "step": 7448, "task_loss": 0.8901467323303223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3736328184604645, "epoch": 6.3, "learning_rate": 5.023547880690739e-06, "loss": 0.3083, "step": 7449, "task_loss": 0.8277965188026428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45770949125289917, "epoch": 6.3, "learning_rate": 5.017509962564908e-06, "loss": 0.3887, "step": 7450, "task_loss": 0.9934028387069702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36371850967407227, "epoch": 6.3, "learning_rate": 5.011472044439078e-06, "loss": 0.3462, "step": 7451, "task_loss": 0.4706520438194275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2084750235080719, "epoch": 6.3, "learning_rate": 5.005434126313247e-06, "loss": 0.3363, "step": 7452, "task_loss": 0.11791546642780304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23237013816833496, "epoch": 6.3, "learning_rate": 4.999396208187418e-06, "loss": 0.2825, "step": 7453, "task_loss": 0.3885297477245331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3151185214519501, "epoch": 6.3, "learning_rate": 4.993358290061587e-06, "loss": 0.3522, "step": 7454, "task_loss": 0.6624180674552917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4527793824672699, "epoch": 6.3, "learning_rate": 4.987320371935757e-06, "loss": 0.3508, "step": 7455, "task_loss": 0.9937955737113953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30184781551361084, "epoch": 6.3, "learning_rate": 4.981282453809927e-06, "loss": 0.3113, "step": 7456, "task_loss": 0.2558978199958801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5412284135818481, "epoch": 6.3, "learning_rate": 4.9752445356840965e-06, "loss": 0.3769, "step": 7457, "task_loss": 0.9833180904388428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2547963857650757, "epoch": 6.3, "learning_rate": 4.969206617558266e-06, "loss": 0.3433, "step": 7458, "task_loss": 0.3682478070259094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2928915321826935, "epoch": 6.3, "learning_rate": 4.963168699432436e-06, "loss": 0.3133, "step": 7459, "task_loss": 0.6987941861152649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40525299310684204, "epoch": 6.31, "learning_rate": 4.957130781306606e-06, "loss": 0.322, "step": 7460, "task_loss": 0.4425482749938965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28083378076553345, "epoch": 6.31, "learning_rate": 4.9510928631807754e-06, "loss": 0.2424, "step": 7461, "task_loss": 0.6496677398681641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3268284201622009, "epoch": 6.31, "learning_rate": 4.945054945054945e-06, "loss": 0.3106, "step": 7462, "task_loss": 0.5334630012512207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18545271456241608, "epoch": 6.31, "learning_rate": 4.939017026929115e-06, "loss": 0.4172, "step": 7463, "task_loss": 0.04510686546564102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16718143224716187, "epoch": 6.31, "learning_rate": 4.9329791088032854e-06, "loss": 0.2114, "step": 7464, "task_loss": 0.6714086532592773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21488721668720245, "epoch": 6.31, "learning_rate": 4.926941190677454e-06, "loss": 0.3246, "step": 7465, "task_loss": 0.5144777894020081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18049761652946472, "epoch": 6.31, "learning_rate": 4.920903272551625e-06, "loss": 0.3581, "step": 7466, "task_loss": 0.7006081342697144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21457399427890778, "epoch": 6.31, "learning_rate": 4.914865354425794e-06, "loss": 0.2736, "step": 7467, "task_loss": 0.6088438630104065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38806018233299255, "epoch": 6.31, "learning_rate": 4.908827436299964e-06, "loss": 0.3092, "step": 7468, "task_loss": 0.6884649395942688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23294708132743835, "epoch": 6.31, "learning_rate": 4.902789518174133e-06, "loss": 0.3267, "step": 7469, "task_loss": 0.14178378880023956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15761572122573853, "epoch": 6.31, "learning_rate": 4.896751600048304e-06, "loss": 0.325, "step": 7470, "task_loss": 0.10209442675113678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29013991355895996, "epoch": 6.32, "learning_rate": 4.8907136819224735e-06, "loss": 0.3449, "step": 7471, "task_loss": 0.6840222477912903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22078578174114227, "epoch": 6.32, "learning_rate": 4.884675763796643e-06, "loss": 0.2721, "step": 7472, "task_loss": 0.2982255518436432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2751384973526001, "epoch": 6.32, "learning_rate": 4.878637845670813e-06, "loss": 0.3935, "step": 7473, "task_loss": 0.7233682870864868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.436810165643692, "epoch": 6.32, "learning_rate": 4.872599927544983e-06, "loss": 0.3459, "step": 7474, "task_loss": 0.7392990589141846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31968504190444946, "epoch": 6.32, "learning_rate": 4.866562009419152e-06, "loss": 0.3613, "step": 7475, "task_loss": 0.9126065969467163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3945504426956177, "epoch": 6.32, "learning_rate": 4.860524091293322e-06, "loss": 0.2993, "step": 7476, "task_loss": 0.16821055114269257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21930691599845886, "epoch": 6.32, "learning_rate": 4.854486173167492e-06, "loss": 0.3052, "step": 7477, "task_loss": 0.20312514901161194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14452366530895233, "epoch": 6.32, "learning_rate": 4.848448255041662e-06, "loss": 0.2275, "step": 7478, "task_loss": 0.24136976897716522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6342542171478271, "epoch": 6.32, "learning_rate": 4.842410336915831e-06, "loss": 0.3333, "step": 7479, "task_loss": 1.0283609628677368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30738335847854614, "epoch": 6.32, "learning_rate": 4.836372418790001e-06, "loss": 0.3724, "step": 7480, "task_loss": 0.8990968465805054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14365771412849426, "epoch": 6.32, "learning_rate": 4.830334500664172e-06, "loss": 0.2393, "step": 7481, "task_loss": 0.4231239855289459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2619793117046356, "epoch": 6.32, "learning_rate": 4.8242965825383405e-06, "loss": 0.3505, "step": 7482, "task_loss": 0.5848236083984375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23235175013542175, "epoch": 6.33, "learning_rate": 4.818258664412511e-06, "loss": 0.2836, "step": 7483, "task_loss": 0.7992692589759827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30387425422668457, "epoch": 6.33, "learning_rate": 4.81222074628668e-06, "loss": 0.2939, "step": 7484, "task_loss": 0.8301934003829956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6070037484169006, "epoch": 6.33, "learning_rate": 4.8061828281608505e-06, "loss": 0.2883, "step": 7485, "task_loss": 0.6052541732788086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25087985396385193, "epoch": 6.33, "learning_rate": 4.80014491003502e-06, "loss": 0.2846, "step": 7486, "task_loss": 0.4343230128288269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21697023510932922, "epoch": 6.33, "learning_rate": 4.79410699190919e-06, "loss": 0.2909, "step": 7487, "task_loss": 0.7106451988220215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18622438609600067, "epoch": 6.33, "learning_rate": 4.78806907378336e-06, "loss": 0.3366, "step": 7488, "task_loss": 0.25719746947288513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18352310359477997, "epoch": 6.33, "learning_rate": 4.782031155657529e-06, "loss": 0.2696, "step": 7489, "task_loss": 0.018977632746100426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42180365324020386, "epoch": 6.33, "learning_rate": 4.775993237531699e-06, "loss": 0.3554, "step": 7490, "task_loss": 0.2160477489233017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2185802310705185, "epoch": 6.33, "learning_rate": 4.76995531940587e-06, "loss": 0.3546, "step": 7491, "task_loss": 0.25241124629974365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2286207675933838, "epoch": 6.33, "learning_rate": 4.7639174012800386e-06, "loss": 0.2302, "step": 7492, "task_loss": 0.3469492197036743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24661637842655182, "epoch": 6.33, "learning_rate": 4.757879483154209e-06, "loss": 0.249, "step": 7493, "task_loss": 0.2970849275588989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19797095656394958, "epoch": 6.33, "learning_rate": 4.751841565028378e-06, "loss": 0.2745, "step": 7494, "task_loss": 0.37985867261886597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18113312125205994, "epoch": 6.34, "learning_rate": 4.745803646902549e-06, "loss": 0.3036, "step": 7495, "task_loss": 0.9065543413162231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2141997069120407, "epoch": 6.34, "learning_rate": 4.739765728776718e-06, "loss": 0.2789, "step": 7496, "task_loss": 0.6669430136680603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26633065938949585, "epoch": 6.34, "learning_rate": 4.733727810650888e-06, "loss": 0.3141, "step": 7497, "task_loss": 0.49598830938339233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1677575707435608, "epoch": 6.34, "learning_rate": 4.727689892525058e-06, "loss": 0.3469, "step": 7498, "task_loss": 0.35596299171447754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40699654817581177, "epoch": 6.34, "learning_rate": 4.7216519743992275e-06, "loss": 0.3484, "step": 7499, "task_loss": 0.3532182276248932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36664801836013794, "epoch": 6.34, "learning_rate": 4.715614056273397e-06, "loss": 0.3032, "step": 7500, "task_loss": 0.5480842590332031 }, { "epoch": 6.34, "eval_accuracy": 0.9200792079207921, "eval_loss": 0.2035595029592514, "eval_runtime": 317.505, "eval_samples_per_second": 79.526, "eval_steps_per_second": 0.624, "step": 7500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30493155121803284, "epoch": 6.34, "learning_rate": 4.709576138147567e-06, "loss": 0.2962, "step": 7501, "task_loss": 0.9281776547431946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4038947820663452, "epoch": 6.34, "learning_rate": 4.703538220021737e-06, "loss": 0.3358, "step": 7502, "task_loss": 0.5104796886444092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2969402074813843, "epoch": 6.34, "learning_rate": 4.697500301895906e-06, "loss": 0.2653, "step": 7503, "task_loss": 0.8178244829177856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14959238469600677, "epoch": 6.34, "learning_rate": 4.691462383770076e-06, "loss": 0.3119, "step": 7504, "task_loss": 0.24561651051044464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23381298780441284, "epoch": 6.34, "learning_rate": 4.685424465644246e-06, "loss": 0.2657, "step": 7505, "task_loss": 0.19420084357261658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3849567770957947, "epoch": 6.34, "learning_rate": 4.679386547518416e-06, "loss": 0.303, "step": 7506, "task_loss": 0.6765482425689697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13592366874217987, "epoch": 6.35, "learning_rate": 4.673348629392585e-06, "loss": 0.3248, "step": 7507, "task_loss": 0.04251393675804138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24718312919139862, "epoch": 6.35, "learning_rate": 4.667310711266756e-06, "loss": 0.3145, "step": 7508, "task_loss": 0.8147677779197693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17135977745056152, "epoch": 6.35, "learning_rate": 4.661272793140925e-06, "loss": 0.2956, "step": 7509, "task_loss": 0.11605851352214813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4197413921356201, "epoch": 6.35, "learning_rate": 4.655234875015095e-06, "loss": 0.3578, "step": 7510, "task_loss": 0.7057867646217346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19640687108039856, "epoch": 6.35, "learning_rate": 4.649196956889264e-06, "loss": 0.2693, "step": 7511, "task_loss": 0.7779016494750977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20966342091560364, "epoch": 6.35, "learning_rate": 4.643159038763435e-06, "loss": 0.2897, "step": 7512, "task_loss": 0.10676931589841843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2094999998807907, "epoch": 6.35, "learning_rate": 4.6371211206376045e-06, "loss": 0.2772, "step": 7513, "task_loss": 0.5195332765579224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2327573597431183, "epoch": 6.35, "learning_rate": 4.631083202511774e-06, "loss": 0.2807, "step": 7514, "task_loss": 0.3305765986442566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18696914613246918, "epoch": 6.35, "learning_rate": 4.625045284385944e-06, "loss": 0.2317, "step": 7515, "task_loss": 0.4105018377304077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27765193581581116, "epoch": 6.35, "learning_rate": 4.619007366260114e-06, "loss": 0.3207, "step": 7516, "task_loss": 0.5961814522743225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17062689363956451, "epoch": 6.35, "learning_rate": 4.612969448134283e-06, "loss": 0.295, "step": 7517, "task_loss": 0.30715513229370117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31954294443130493, "epoch": 6.35, "learning_rate": 4.606931530008454e-06, "loss": 0.2881, "step": 7518, "task_loss": 0.3773168623447418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2624664902687073, "epoch": 6.36, "learning_rate": 4.600893611882623e-06, "loss": 0.2685, "step": 7519, "task_loss": 0.13382868468761444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3585376739501953, "epoch": 6.36, "learning_rate": 4.594855693756793e-06, "loss": 0.4433, "step": 7520, "task_loss": 0.4958493709564209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27550750970840454, "epoch": 6.36, "learning_rate": 4.588817775630962e-06, "loss": 0.3414, "step": 7521, "task_loss": 0.5583347678184509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35394930839538574, "epoch": 6.36, "learning_rate": 4.582779857505133e-06, "loss": 0.3556, "step": 7522, "task_loss": 0.775375485420227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1527116596698761, "epoch": 6.36, "learning_rate": 4.5767419393793026e-06, "loss": 0.3318, "step": 7523, "task_loss": 0.02786460518836975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28847482800483704, "epoch": 6.36, "learning_rate": 4.570704021253472e-06, "loss": 0.2903, "step": 7524, "task_loss": 0.4424231946468353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1607355922460556, "epoch": 6.36, "learning_rate": 4.564666103127642e-06, "loss": 0.3176, "step": 7525, "task_loss": 0.5967807769775391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3989959955215454, "epoch": 6.36, "learning_rate": 4.558628185001811e-06, "loss": 0.3453, "step": 7526, "task_loss": 1.009029746055603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33005961775779724, "epoch": 6.36, "learning_rate": 4.5525902668759815e-06, "loss": 0.3039, "step": 7527, "task_loss": 0.1949731409549713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1435426026582718, "epoch": 6.36, "learning_rate": 4.546552348750151e-06, "loss": 0.2729, "step": 7528, "task_loss": 0.4392089545726776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29769963026046753, "epoch": 6.36, "learning_rate": 4.540514430624321e-06, "loss": 0.3007, "step": 7529, "task_loss": 0.9871595501899719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3698617219924927, "epoch": 6.36, "learning_rate": 4.534476512498491e-06, "loss": 0.319, "step": 7530, "task_loss": 0.1453026980161667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18315255641937256, "epoch": 6.37, "learning_rate": 4.52843859437266e-06, "loss": 0.2885, "step": 7531, "task_loss": 0.2272672951221466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3922114372253418, "epoch": 6.37, "learning_rate": 4.52240067624683e-06, "loss": 0.3785, "step": 7532, "task_loss": 0.9383247494697571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.427988737821579, "epoch": 6.37, "learning_rate": 4.516362758121001e-06, "loss": 0.3669, "step": 7533, "task_loss": 0.38069799542427063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24319396913051605, "epoch": 6.37, "learning_rate": 4.5103248399951695e-06, "loss": 0.3131, "step": 7534, "task_loss": 0.4739033579826355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4190860688686371, "epoch": 6.37, "learning_rate": 4.50428692186934e-06, "loss": 0.357, "step": 7535, "task_loss": 0.6699467301368713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13817594945430756, "epoch": 6.37, "learning_rate": 4.498249003743509e-06, "loss": 0.2904, "step": 7536, "task_loss": 0.11876649409532547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14934927225112915, "epoch": 6.37, "learning_rate": 4.4922110856176795e-06, "loss": 0.3001, "step": 7537, "task_loss": 0.1713978797197342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41087043285369873, "epoch": 6.37, "learning_rate": 4.486173167491849e-06, "loss": 0.4018, "step": 7538, "task_loss": 0.7672751545906067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2538594901561737, "epoch": 6.37, "learning_rate": 4.480135249366019e-06, "loss": 0.3664, "step": 7539, "task_loss": 0.9515554308891296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27855831384658813, "epoch": 6.37, "learning_rate": 4.474097331240189e-06, "loss": 0.3057, "step": 7540, "task_loss": 0.2079780548810959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1433122158050537, "epoch": 6.37, "learning_rate": 4.4680594131143584e-06, "loss": 0.3201, "step": 7541, "task_loss": 0.18658651411533356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15086452662944794, "epoch": 6.38, "learning_rate": 4.462021494988528e-06, "loss": 0.2913, "step": 7542, "task_loss": 0.309598445892334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21658927202224731, "epoch": 6.38, "learning_rate": 4.455983576862698e-06, "loss": 0.2847, "step": 7543, "task_loss": 0.6333129405975342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35768622159957886, "epoch": 6.38, "learning_rate": 4.449945658736868e-06, "loss": 0.347, "step": 7544, "task_loss": 1.2652344703674316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25624990463256836, "epoch": 6.38, "learning_rate": 4.443907740611037e-06, "loss": 0.2883, "step": 7545, "task_loss": 0.8405542969703674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15250089764595032, "epoch": 6.38, "learning_rate": 4.437869822485207e-06, "loss": 0.2319, "step": 7546, "task_loss": 0.17192737758159637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23048274219036102, "epoch": 6.38, "learning_rate": 4.431831904359377e-06, "loss": 0.2994, "step": 7547, "task_loss": 1.0219038724899292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3848186433315277, "epoch": 6.38, "learning_rate": 4.425793986233547e-06, "loss": 0.3149, "step": 7548, "task_loss": 0.6340187788009644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42621853947639465, "epoch": 6.38, "learning_rate": 4.419756068107716e-06, "loss": 0.3298, "step": 7549, "task_loss": 0.13342466950416565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19078075885772705, "epoch": 6.38, "learning_rate": 4.413718149981887e-06, "loss": 0.2292, "step": 7550, "task_loss": 0.20141394436359406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4417015314102173, "epoch": 6.38, "learning_rate": 4.407680231856056e-06, "loss": 0.3692, "step": 7551, "task_loss": 1.303186058998108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32831189036369324, "epoch": 6.38, "learning_rate": 4.401642313730226e-06, "loss": 0.2775, "step": 7552, "task_loss": 0.3161212205886841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3008832335472107, "epoch": 6.38, "learning_rate": 4.395604395604396e-06, "loss": 0.2567, "step": 7553, "task_loss": 1.0426872968673706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2003769427537918, "epoch": 6.39, "learning_rate": 4.389566477478566e-06, "loss": 0.3332, "step": 7554, "task_loss": 1.3586645126342773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31327491998672485, "epoch": 6.39, "learning_rate": 4.383528559352735e-06, "loss": 0.3632, "step": 7555, "task_loss": 0.5648748874664307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25967657566070557, "epoch": 6.39, "learning_rate": 4.377490641226905e-06, "loss": 0.3117, "step": 7556, "task_loss": 0.19392432272434235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3295894265174866, "epoch": 6.39, "learning_rate": 4.371452723101075e-06, "loss": 0.3489, "step": 7557, "task_loss": 0.3380758762359619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19442905485630035, "epoch": 6.39, "learning_rate": 4.365414804975245e-06, "loss": 0.289, "step": 7558, "task_loss": 0.6876479387283325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28992944955825806, "epoch": 6.39, "learning_rate": 4.359376886849414e-06, "loss": 0.3173, "step": 7559, "task_loss": 0.6975722908973694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4166426658630371, "epoch": 6.39, "learning_rate": 4.353338968723585e-06, "loss": 0.3128, "step": 7560, "task_loss": 0.2757812738418579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4619208574295044, "epoch": 6.39, "learning_rate": 4.347301050597754e-06, "loss": 0.3587, "step": 7561, "task_loss": 1.4517521858215332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4450656473636627, "epoch": 6.39, "learning_rate": 4.341263132471924e-06, "loss": 0.2856, "step": 7562, "task_loss": 0.7957935929298401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18515165150165558, "epoch": 6.39, "learning_rate": 4.335225214346093e-06, "loss": 0.3053, "step": 7563, "task_loss": 0.11659304052591324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3538583517074585, "epoch": 6.39, "learning_rate": 4.329187296220264e-06, "loss": 0.362, "step": 7564, "task_loss": 0.14574232697486877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26322171092033386, "epoch": 6.39, "learning_rate": 4.3231493780944335e-06, "loss": 0.3621, "step": 7565, "task_loss": 0.6226597428321838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3587949872016907, "epoch": 6.4, "learning_rate": 4.317111459968603e-06, "loss": 0.3923, "step": 7566, "task_loss": 0.9011669754981995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3719926178455353, "epoch": 6.4, "learning_rate": 4.311073541842773e-06, "loss": 0.2886, "step": 7567, "task_loss": 0.5172410607337952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38466593623161316, "epoch": 6.4, "learning_rate": 4.305035623716943e-06, "loss": 0.3698, "step": 7568, "task_loss": 0.8093209862709045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3381968140602112, "epoch": 6.4, "learning_rate": 4.298997705591112e-06, "loss": 0.4314, "step": 7569, "task_loss": 0.24479562044143677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14972729980945587, "epoch": 6.4, "learning_rate": 4.292959787465282e-06, "loss": 0.3314, "step": 7570, "task_loss": 0.6810972690582275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27017974853515625, "epoch": 6.4, "learning_rate": 4.286921869339452e-06, "loss": 0.3292, "step": 7571, "task_loss": 0.04139675199985504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4216519594192505, "epoch": 6.4, "learning_rate": 4.280883951213622e-06, "loss": 0.3127, "step": 7572, "task_loss": 0.9766867756843567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23951831459999084, "epoch": 6.4, "learning_rate": 4.274846033087791e-06, "loss": 0.3392, "step": 7573, "task_loss": 0.6377594470977783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23742717504501343, "epoch": 6.4, "learning_rate": 4.268808114961961e-06, "loss": 0.3777, "step": 7574, "task_loss": 1.1173431873321533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31451931595802307, "epoch": 6.4, "learning_rate": 4.262770196836132e-06, "loss": 0.3639, "step": 7575, "task_loss": 1.125075340270996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.310459166765213, "epoch": 6.4, "learning_rate": 4.2567322787103005e-06, "loss": 0.4045, "step": 7576, "task_loss": 0.14695501327514648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15516257286071777, "epoch": 6.4, "learning_rate": 4.250694360584471e-06, "loss": 0.2457, "step": 7577, "task_loss": 0.046469125896692276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1800241768360138, "epoch": 6.41, "learning_rate": 4.24465644245864e-06, "loss": 0.2623, "step": 7578, "task_loss": 0.30151429772377014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2565627992153168, "epoch": 6.41, "learning_rate": 4.2386185243328105e-06, "loss": 0.3289, "step": 7579, "task_loss": 0.7676568627357483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4208149313926697, "epoch": 6.41, "learning_rate": 4.23258060620698e-06, "loss": 0.3283, "step": 7580, "task_loss": 0.2441713660955429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3327839970588684, "epoch": 6.41, "learning_rate": 4.22654268808115e-06, "loss": 0.3034, "step": 7581, "task_loss": 0.6181824207305908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26663511991500854, "epoch": 6.41, "learning_rate": 4.22050476995532e-06, "loss": 0.2377, "step": 7582, "task_loss": 0.14971400797367096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3141137361526489, "epoch": 6.41, "learning_rate": 4.214466851829489e-06, "loss": 0.3315, "step": 7583, "task_loss": 0.7046247720718384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13665702939033508, "epoch": 6.41, "learning_rate": 4.208428933703659e-06, "loss": 0.3216, "step": 7584, "task_loss": 0.3702460527420044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14996786415576935, "epoch": 6.41, "learning_rate": 4.20239101557783e-06, "loss": 0.2559, "step": 7585, "task_loss": 0.5115115642547607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44514206051826477, "epoch": 6.41, "learning_rate": 4.1963530974519986e-06, "loss": 0.3442, "step": 7586, "task_loss": 1.189767837524414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3971271812915802, "epoch": 6.41, "learning_rate": 4.190315179326169e-06, "loss": 0.3985, "step": 7587, "task_loss": 1.0831173658370972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20305129885673523, "epoch": 6.41, "learning_rate": 4.184277261200338e-06, "loss": 0.2547, "step": 7588, "task_loss": 0.4932834804058075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3805791139602661, "epoch": 6.41, "learning_rate": 4.178239343074508e-06, "loss": 0.3964, "step": 7589, "task_loss": 1.1334534883499146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35125017166137695, "epoch": 6.42, "learning_rate": 4.172201424948678e-06, "loss": 0.2582, "step": 7590, "task_loss": 0.2632133960723877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3267618417739868, "epoch": 6.42, "learning_rate": 4.166163506822847e-06, "loss": 0.3339, "step": 7591, "task_loss": 0.699101448059082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18370021879673004, "epoch": 6.42, "learning_rate": 4.160125588697018e-06, "loss": 0.3501, "step": 7592, "task_loss": 0.28390124440193176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35890787839889526, "epoch": 6.42, "learning_rate": 4.154087670571187e-06, "loss": 0.286, "step": 7593, "task_loss": 1.2647292613983154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3544279932975769, "epoch": 6.42, "learning_rate": 4.148049752445357e-06, "loss": 0.347, "step": 7594, "task_loss": 0.23972183465957642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26000529527664185, "epoch": 6.42, "learning_rate": 4.142011834319527e-06, "loss": 0.236, "step": 7595, "task_loss": 0.5440828204154968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.178984135389328, "epoch": 6.42, "learning_rate": 4.135973916193697e-06, "loss": 0.2532, "step": 7596, "task_loss": 0.33593595027923584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23022103309631348, "epoch": 6.42, "learning_rate": 4.129935998067866e-06, "loss": 0.2462, "step": 7597, "task_loss": 0.2178191840648651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25842761993408203, "epoch": 6.42, "learning_rate": 4.123898079942036e-06, "loss": 0.2868, "step": 7598, "task_loss": 0.41043075919151306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37892740964889526, "epoch": 6.42, "learning_rate": 4.117860161816206e-06, "loss": 0.3122, "step": 7599, "task_loss": 1.347780704498291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4189637303352356, "epoch": 6.42, "learning_rate": 4.1118222436903755e-06, "loss": 0.2924, "step": 7600, "task_loss": 0.5836082100868225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31707924604415894, "epoch": 6.42, "learning_rate": 4.105784325564545e-06, "loss": 0.2107, "step": 7601, "task_loss": 0.8886826038360596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19576415419578552, "epoch": 6.43, "learning_rate": 4.099746407438716e-06, "loss": 0.1693, "step": 7602, "task_loss": 0.2022216022014618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.300383597612381, "epoch": 6.43, "learning_rate": 4.093708489312885e-06, "loss": 0.3881, "step": 7603, "task_loss": 0.3401297628879547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14522093534469604, "epoch": 6.43, "learning_rate": 4.087670571187055e-06, "loss": 0.2774, "step": 7604, "task_loss": 0.5877777934074402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24746860563755035, "epoch": 6.43, "learning_rate": 4.081632653061224e-06, "loss": 0.3158, "step": 7605, "task_loss": 0.07596085220575333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14857080578804016, "epoch": 6.43, "learning_rate": 4.075594734935395e-06, "loss": 0.2343, "step": 7606, "task_loss": 0.48237183690071106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27401381731033325, "epoch": 6.43, "learning_rate": 4.0695568168095645e-06, "loss": 0.2835, "step": 7607, "task_loss": 0.4748966097831726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19752860069274902, "epoch": 6.43, "learning_rate": 4.063518898683734e-06, "loss": 0.3137, "step": 7608, "task_loss": 0.9712822437286377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21586987376213074, "epoch": 6.43, "learning_rate": 4.057480980557904e-06, "loss": 0.3219, "step": 7609, "task_loss": 1.0615363121032715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2788478434085846, "epoch": 6.43, "learning_rate": 4.051443062432074e-06, "loss": 0.2448, "step": 7610, "task_loss": 0.6002561450004578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22147850692272186, "epoch": 6.43, "learning_rate": 4.045405144306243e-06, "loss": 0.3392, "step": 7611, "task_loss": 0.5436692237854004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17395344376564026, "epoch": 6.43, "learning_rate": 4.039367226180413e-06, "loss": 0.2595, "step": 7612, "task_loss": 0.4793539047241211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2913029193878174, "epoch": 6.44, "learning_rate": 4.033329308054583e-06, "loss": 0.3166, "step": 7613, "task_loss": 0.20283293724060059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33283916115760803, "epoch": 6.44, "learning_rate": 4.0272913899287525e-06, "loss": 0.3449, "step": 7614, "task_loss": 1.2942601442337036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3849818706512451, "epoch": 6.44, "learning_rate": 4.021253471802922e-06, "loss": 0.3954, "step": 7615, "task_loss": 0.47298741340637207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20927035808563232, "epoch": 6.44, "learning_rate": 4.015215553677092e-06, "loss": 0.3519, "step": 7616, "task_loss": 1.0701066255569458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3142118453979492, "epoch": 6.44, "learning_rate": 4.0091776355512625e-06, "loss": 0.2993, "step": 7617, "task_loss": 0.20362406969070435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.303452730178833, "epoch": 6.44, "learning_rate": 4.0031397174254314e-06, "loss": 0.3195, "step": 7618, "task_loss": 0.8582145571708679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20560412108898163, "epoch": 6.44, "learning_rate": 3.997101799299602e-06, "loss": 0.2832, "step": 7619, "task_loss": 1.0934340953826904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2720237970352173, "epoch": 6.44, "learning_rate": 3.991063881173771e-06, "loss": 0.2869, "step": 7620, "task_loss": 0.7463276982307434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21127812564373016, "epoch": 6.44, "learning_rate": 3.9850259630479414e-06, "loss": 0.277, "step": 7621, "task_loss": 0.059178370982408524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.288361132144928, "epoch": 6.44, "learning_rate": 3.978988044922111e-06, "loss": 0.2979, "step": 7622, "task_loss": 0.6369981169700623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27724629640579224, "epoch": 6.44, "learning_rate": 3.972950126796281e-06, "loss": 0.2532, "step": 7623, "task_loss": 0.6088528037071228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31978651881217957, "epoch": 6.44, "learning_rate": 3.966912208670451e-06, "loss": 0.2846, "step": 7624, "task_loss": 0.6300305128097534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2228725254535675, "epoch": 6.45, "learning_rate": 3.96087429054462e-06, "loss": 0.3113, "step": 7625, "task_loss": 0.36247718334198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.259646475315094, "epoch": 6.45, "learning_rate": 3.95483637241879e-06, "loss": 0.255, "step": 7626, "task_loss": 0.20775899291038513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22382968664169312, "epoch": 6.45, "learning_rate": 3.948798454292961e-06, "loss": 0.3139, "step": 7627, "task_loss": 0.6190739274024963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27425381541252136, "epoch": 6.45, "learning_rate": 3.9427605361671295e-06, "loss": 0.3283, "step": 7628, "task_loss": 0.4474986493587494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26254940032958984, "epoch": 6.45, "learning_rate": 3.9367226180413e-06, "loss": 0.2732, "step": 7629, "task_loss": 0.5793317556381226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3396044969558716, "epoch": 6.45, "learning_rate": 3.930684699915469e-06, "loss": 0.3183, "step": 7630, "task_loss": 0.7887149453163147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33999815583229065, "epoch": 6.45, "learning_rate": 3.9246467817896395e-06, "loss": 0.3661, "step": 7631, "task_loss": 0.06450016796588898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42943137884140015, "epoch": 6.45, "learning_rate": 3.918608863663809e-06, "loss": 0.4371, "step": 7632, "task_loss": 1.6828551292419434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3921581506729126, "epoch": 6.45, "learning_rate": 3.912570945537979e-06, "loss": 0.4142, "step": 7633, "task_loss": 0.36483824253082275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19788485765457153, "epoch": 6.45, "learning_rate": 3.906533027412149e-06, "loss": 0.2245, "step": 7634, "task_loss": 0.8897773623466492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.282412052154541, "epoch": 6.45, "learning_rate": 3.9004951092863184e-06, "loss": 0.3128, "step": 7635, "task_loss": 0.7074922323226929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19818323850631714, "epoch": 6.45, "learning_rate": 3.894457191160488e-06, "loss": 0.2709, "step": 7636, "task_loss": 0.1555086374282837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14269568026065826, "epoch": 6.46, "learning_rate": 3.888419273034658e-06, "loss": 0.3359, "step": 7637, "task_loss": 0.41224798560142517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2806837558746338, "epoch": 6.46, "learning_rate": 3.882381354908828e-06, "loss": 0.2787, "step": 7638, "task_loss": 1.3711081743240356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24281884729862213, "epoch": 6.46, "learning_rate": 3.876343436782997e-06, "loss": 0.4352, "step": 7639, "task_loss": 0.5242077112197876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29914671182632446, "epoch": 6.46, "learning_rate": 3.870305518657167e-06, "loss": 0.2331, "step": 7640, "task_loss": 0.32930976152420044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22361668944358826, "epoch": 6.46, "learning_rate": 3.864267600531337e-06, "loss": 0.3022, "step": 7641, "task_loss": 0.41104990243911743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.233873188495636, "epoch": 6.46, "learning_rate": 3.858229682405507e-06, "loss": 0.2568, "step": 7642, "task_loss": 0.331900030374527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3556651473045349, "epoch": 6.46, "learning_rate": 3.852191764279676e-06, "loss": 0.3105, "step": 7643, "task_loss": 1.2825204133987427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3864246606826782, "epoch": 6.46, "learning_rate": 3.846153846153847e-06, "loss": 0.32, "step": 7644, "task_loss": 0.13983586430549622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20786814391613007, "epoch": 6.46, "learning_rate": 3.840115928028016e-06, "loss": 0.2568, "step": 7645, "task_loss": 0.42242494225502014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3284645676612854, "epoch": 6.46, "learning_rate": 3.834078009902186e-06, "loss": 0.3514, "step": 7646, "task_loss": 0.3248153626918793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2723766565322876, "epoch": 6.46, "learning_rate": 3.828040091776355e-06, "loss": 0.243, "step": 7647, "task_loss": 0.17148272693157196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2803103029727936, "epoch": 6.46, "learning_rate": 3.822002173650526e-06, "loss": 0.2593, "step": 7648, "task_loss": 1.4269896745681763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23031777143478394, "epoch": 6.47, "learning_rate": 3.815964255524695e-06, "loss": 0.22, "step": 7649, "task_loss": 0.23004360496997833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3232995867729187, "epoch": 6.47, "learning_rate": 3.8099263373988647e-06, "loss": 0.3338, "step": 7650, "task_loss": 0.13335521519184113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28755271434783936, "epoch": 6.47, "learning_rate": 3.803888419273035e-06, "loss": 0.3683, "step": 7651, "task_loss": 0.5924611687660217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18796807527542114, "epoch": 6.47, "learning_rate": 3.797850501147204e-06, "loss": 0.3302, "step": 7652, "task_loss": 0.7696681618690491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.316429078578949, "epoch": 6.47, "learning_rate": 3.7918125830213743e-06, "loss": 0.3219, "step": 7653, "task_loss": 0.29527080059051514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2695137858390808, "epoch": 6.47, "learning_rate": 3.7857746648955445e-06, "loss": 0.3621, "step": 7654, "task_loss": 0.25421684980392456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22983390092849731, "epoch": 6.47, "learning_rate": 3.7797367467697138e-06, "loss": 0.221, "step": 7655, "task_loss": 0.2566205859184265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39267033338546753, "epoch": 6.47, "learning_rate": 3.773698828643884e-06, "loss": 0.3233, "step": 7656, "task_loss": 0.5737228989601135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.532379150390625, "epoch": 6.47, "learning_rate": 3.767660910518053e-06, "loss": 0.3265, "step": 7657, "task_loss": 0.1918427050113678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2840079665184021, "epoch": 6.47, "learning_rate": 3.7616229923922234e-06, "loss": 0.3287, "step": 7658, "task_loss": 0.9820913076400757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2744162976741791, "epoch": 6.47, "learning_rate": 3.7555850742663935e-06, "loss": 0.3125, "step": 7659, "task_loss": 0.1550896018743515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2549930214881897, "epoch": 6.47, "learning_rate": 3.749547156140563e-06, "loss": 0.2543, "step": 7660, "task_loss": 0.13956153392791748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1189311295747757, "epoch": 6.48, "learning_rate": 3.743509238014733e-06, "loss": 0.2471, "step": 7661, "task_loss": 0.4829791188240051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1797708421945572, "epoch": 6.48, "learning_rate": 3.7374713198889023e-06, "loss": 0.3485, "step": 7662, "task_loss": 0.7480167746543884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2164568156003952, "epoch": 6.48, "learning_rate": 3.7314334017630724e-06, "loss": 0.3245, "step": 7663, "task_loss": 0.07709074020385742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33491480350494385, "epoch": 6.48, "learning_rate": 3.725395483637242e-06, "loss": 0.4589, "step": 7664, "task_loss": 0.8044095039367676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3397635817527771, "epoch": 6.48, "learning_rate": 3.719357565511412e-06, "loss": 0.3066, "step": 7665, "task_loss": 1.7679390907287598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3863993287086487, "epoch": 6.48, "learning_rate": 3.7133196473855816e-06, "loss": 0.3758, "step": 7666, "task_loss": 0.5524356365203857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2886066436767578, "epoch": 6.48, "learning_rate": 3.7072817292597513e-06, "loss": 0.2586, "step": 7667, "task_loss": 0.5673776268959045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4014289379119873, "epoch": 6.48, "learning_rate": 3.701243811133921e-06, "loss": 0.2849, "step": 7668, "task_loss": 0.6463268995285034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1997617930173874, "epoch": 6.48, "learning_rate": 3.695205893008091e-06, "loss": 0.2219, "step": 7669, "task_loss": 0.3589445650577545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2855781018733978, "epoch": 6.48, "learning_rate": 3.6891679748822605e-06, "loss": 0.3078, "step": 7670, "task_loss": 1.0870369672775269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2754482328891754, "epoch": 6.48, "learning_rate": 3.6831300567564306e-06, "loss": 0.332, "step": 7671, "task_loss": 0.6770545244216919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34514573216438293, "epoch": 6.48, "learning_rate": 3.6770921386306e-06, "loss": 0.3212, "step": 7672, "task_loss": 0.37196940183639526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1223529577255249, "epoch": 6.49, "learning_rate": 3.67105422050477e-06, "loss": 0.2357, "step": 7673, "task_loss": 0.3342821002006531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3233817219734192, "epoch": 6.49, "learning_rate": 3.66501630237894e-06, "loss": 0.2964, "step": 7674, "task_loss": 0.5090448260307312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24306508898735046, "epoch": 6.49, "learning_rate": 3.6589783842531095e-06, "loss": 0.2658, "step": 7675, "task_loss": 0.1127779558300972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3854804039001465, "epoch": 6.49, "learning_rate": 3.6529404661272797e-06, "loss": 0.2991, "step": 7676, "task_loss": 0.4436812102794647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29993385076522827, "epoch": 6.49, "learning_rate": 3.646902548001449e-06, "loss": 0.2949, "step": 7677, "task_loss": 0.06425652652978897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2756175398826599, "epoch": 6.49, "learning_rate": 3.640864629875619e-06, "loss": 0.3209, "step": 7678, "task_loss": 0.5114643573760986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4125553071498871, "epoch": 6.49, "learning_rate": 3.6348267117497893e-06, "loss": 0.3582, "step": 7679, "task_loss": 0.4119686484336853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26308587193489075, "epoch": 6.49, "learning_rate": 3.6287887936239586e-06, "loss": 0.4307, "step": 7680, "task_loss": 1.1664143800735474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23794469237327576, "epoch": 6.49, "learning_rate": 3.6227508754981287e-06, "loss": 0.2265, "step": 7681, "task_loss": 0.1225070208311081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25656676292419434, "epoch": 6.49, "learning_rate": 3.616712957372298e-06, "loss": 0.3423, "step": 7682, "task_loss": 0.12543822824954987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1462290734052658, "epoch": 6.49, "learning_rate": 3.610675039246468e-06, "loss": 0.3115, "step": 7683, "task_loss": 0.008601074106991291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1817118227481842, "epoch": 6.5, "learning_rate": 3.6046371211206383e-06, "loss": 0.3537, "step": 7684, "task_loss": 0.3671954572200775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26163482666015625, "epoch": 6.5, "learning_rate": 3.5985992029948076e-06, "loss": 0.2771, "step": 7685, "task_loss": 0.9067686796188354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2924979031085968, "epoch": 6.5, "learning_rate": 3.5925612848689777e-06, "loss": 0.293, "step": 7686, "task_loss": 0.5963922739028931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4087796211242676, "epoch": 6.5, "learning_rate": 3.586523366743147e-06, "loss": 0.2841, "step": 7687, "task_loss": 1.0325989723205566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32888954877853394, "epoch": 6.5, "learning_rate": 3.580485448617317e-06, "loss": 0.2884, "step": 7688, "task_loss": 0.5887385010719299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16646787524223328, "epoch": 6.5, "learning_rate": 3.5744475304914865e-06, "loss": 0.2253, "step": 7689, "task_loss": 0.6122108101844788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25914356112480164, "epoch": 6.5, "learning_rate": 3.5684096123656566e-06, "loss": 0.2599, "step": 7690, "task_loss": 1.3474130630493164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3753446042537689, "epoch": 6.5, "learning_rate": 3.5623716942398264e-06, "loss": 0.3567, "step": 7691, "task_loss": 1.1890486478805542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20404039323329926, "epoch": 6.5, "learning_rate": 3.5563337761139957e-06, "loss": 0.3204, "step": 7692, "task_loss": 0.1062454879283905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26682597398757935, "epoch": 6.5, "learning_rate": 3.550295857988166e-06, "loss": 0.3833, "step": 7693, "task_loss": 0.3857364058494568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.282572478055954, "epoch": 6.5, "learning_rate": 3.544257939862335e-06, "loss": 0.2556, "step": 7694, "task_loss": 0.43736332654953003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3246682584285736, "epoch": 6.5, "learning_rate": 3.5382200217365053e-06, "loss": 0.3231, "step": 7695, "task_loss": 0.3831939101219177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36566394567489624, "epoch": 6.51, "learning_rate": 3.5321821036106754e-06, "loss": 0.3678, "step": 7696, "task_loss": 0.8265041708946228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28173714876174927, "epoch": 6.51, "learning_rate": 3.5261441854848447e-06, "loss": 0.3068, "step": 7697, "task_loss": 1.109744906425476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3975597321987152, "epoch": 6.51, "learning_rate": 3.520106267359015e-06, "loss": 0.2841, "step": 7698, "task_loss": 0.9667664766311646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23021508753299713, "epoch": 6.51, "learning_rate": 3.514068349233184e-06, "loss": 0.3338, "step": 7699, "task_loss": 0.2428341507911682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3318876028060913, "epoch": 6.51, "learning_rate": 3.5080304311073543e-06, "loss": 0.2687, "step": 7700, "task_loss": 0.5448859333992004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2753526568412781, "epoch": 6.51, "learning_rate": 3.5019925129815245e-06, "loss": 0.3519, "step": 7701, "task_loss": 0.12021508812904358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21225327253341675, "epoch": 6.51, "learning_rate": 3.4959545948556938e-06, "loss": 0.346, "step": 7702, "task_loss": 0.35867640376091003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18465828895568848, "epoch": 6.51, "learning_rate": 3.489916676729864e-06, "loss": 0.2752, "step": 7703, "task_loss": 0.5042694807052612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22780777513980865, "epoch": 6.51, "learning_rate": 3.483878758604033e-06, "loss": 0.3525, "step": 7704, "task_loss": 0.5017569661140442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2332690954208374, "epoch": 6.51, "learning_rate": 3.4778408404782034e-06, "loss": 0.319, "step": 7705, "task_loss": 0.6484036445617676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25557589530944824, "epoch": 6.51, "learning_rate": 3.4718029223523735e-06, "loss": 0.2707, "step": 7706, "task_loss": 0.397205114364624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3088238835334778, "epoch": 6.51, "learning_rate": 3.465765004226543e-06, "loss": 0.2653, "step": 7707, "task_loss": 0.7470111846923828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28643590211868286, "epoch": 6.52, "learning_rate": 3.459727086100713e-06, "loss": 0.2644, "step": 7708, "task_loss": 1.1843187808990479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32370615005493164, "epoch": 6.52, "learning_rate": 3.4536891679748822e-06, "loss": 0.351, "step": 7709, "task_loss": 0.5305542945861816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4155447483062744, "epoch": 6.52, "learning_rate": 3.4476512498490524e-06, "loss": 0.3729, "step": 7710, "task_loss": 0.7248915433883667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33226478099823, "epoch": 6.52, "learning_rate": 3.441613331723222e-06, "loss": 0.3447, "step": 7711, "task_loss": 0.17352278530597687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18187671899795532, "epoch": 6.52, "learning_rate": 3.435575413597392e-06, "loss": 0.3033, "step": 7712, "task_loss": 0.9112693071365356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2823968529701233, "epoch": 6.52, "learning_rate": 3.4295374954715616e-06, "loss": 0.384, "step": 7713, "task_loss": 0.43994268774986267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3167378306388855, "epoch": 6.52, "learning_rate": 3.4234995773457313e-06, "loss": 0.3286, "step": 7714, "task_loss": 0.5456465482711792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2256161868572235, "epoch": 6.52, "learning_rate": 3.417461659219901e-06, "loss": 0.2984, "step": 7715, "task_loss": 0.7288791537284851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24421262741088867, "epoch": 6.52, "learning_rate": 3.411423741094071e-06, "loss": 0.2789, "step": 7716, "task_loss": 0.2254870980978012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5115000009536743, "epoch": 6.52, "learning_rate": 3.4053858229682405e-06, "loss": 0.3563, "step": 7717, "task_loss": 0.8104994297027588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29523026943206787, "epoch": 6.52, "learning_rate": 3.3993479048424106e-06, "loss": 0.3453, "step": 7718, "task_loss": 0.44565337896347046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2543001174926758, "epoch": 6.52, "learning_rate": 3.39330998671658e-06, "loss": 0.3419, "step": 7719, "task_loss": 0.6663616299629211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23685777187347412, "epoch": 6.53, "learning_rate": 3.38727206859075e-06, "loss": 0.3189, "step": 7720, "task_loss": 0.8160300254821777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40775996446609497, "epoch": 6.53, "learning_rate": 3.38123415046492e-06, "loss": 0.3182, "step": 7721, "task_loss": 0.3239287734031677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4789702892303467, "epoch": 6.53, "learning_rate": 3.3751962323390895e-06, "loss": 0.4027, "step": 7722, "task_loss": 0.8513980507850647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17842161655426025, "epoch": 6.53, "learning_rate": 3.3691583142132597e-06, "loss": 0.2812, "step": 7723, "task_loss": 0.1963832676410675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.256125271320343, "epoch": 6.53, "learning_rate": 3.363120396087429e-06, "loss": 0.3678, "step": 7724, "task_loss": 0.28670963644981384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2568109631538391, "epoch": 6.53, "learning_rate": 3.357082477961599e-06, "loss": 0.2995, "step": 7725, "task_loss": 0.9475805163383484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28590115904808044, "epoch": 6.53, "learning_rate": 3.3510445598357693e-06, "loss": 0.3601, "step": 7726, "task_loss": 0.7215396165847778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2433110922574997, "epoch": 6.53, "learning_rate": 3.3450066417099386e-06, "loss": 0.2535, "step": 7727, "task_loss": 0.08296217024326324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2580510973930359, "epoch": 6.53, "learning_rate": 3.3389687235841087e-06, "loss": 0.2551, "step": 7728, "task_loss": 0.03426100313663483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3052981197834015, "epoch": 6.53, "learning_rate": 3.332930805458278e-06, "loss": 0.3119, "step": 7729, "task_loss": 0.4526112675666809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21933098137378693, "epoch": 6.53, "learning_rate": 3.326892887332448e-06, "loss": 0.2405, "step": 7730, "task_loss": 0.4448099732398987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2532702088356018, "epoch": 6.53, "learning_rate": 3.3208549692066174e-06, "loss": 0.3008, "step": 7731, "task_loss": 0.2957974672317505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2416529357433319, "epoch": 6.54, "learning_rate": 3.3148170510807876e-06, "loss": 0.2724, "step": 7732, "task_loss": 1.0096704959869385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32909345626831055, "epoch": 6.54, "learning_rate": 3.3087791329549573e-06, "loss": 0.2943, "step": 7733, "task_loss": 0.6797402501106262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31137615442276, "epoch": 6.54, "learning_rate": 3.302741214829127e-06, "loss": 0.2681, "step": 7734, "task_loss": 0.44247618317604065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29257434606552124, "epoch": 6.54, "learning_rate": 3.2967032967032968e-06, "loss": 0.3398, "step": 7735, "task_loss": 0.45513013005256653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.241083562374115, "epoch": 6.54, "learning_rate": 3.2906653785774665e-06, "loss": 0.283, "step": 7736, "task_loss": 1.1103302240371704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5558875799179077, "epoch": 6.54, "learning_rate": 3.2846274604516362e-06, "loss": 0.3886, "step": 7737, "task_loss": 1.2206217050552368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.269656240940094, "epoch": 6.54, "learning_rate": 3.2785895423258064e-06, "loss": 0.2954, "step": 7738, "task_loss": 0.5726799368858337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29110953211784363, "epoch": 6.54, "learning_rate": 3.2725516241999757e-06, "loss": 0.2795, "step": 7739, "task_loss": 0.3117235600948334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15816456079483032, "epoch": 6.54, "learning_rate": 3.266513706074146e-06, "loss": 0.2681, "step": 7740, "task_loss": 0.3725349009037018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20428690314292908, "epoch": 6.54, "learning_rate": 3.260475787948315e-06, "loss": 0.2371, "step": 7741, "task_loss": 0.5562343001365662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16826528310775757, "epoch": 6.54, "learning_rate": 3.2544378698224853e-06, "loss": 0.2374, "step": 7742, "task_loss": 0.06252474337816238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22145065665245056, "epoch": 6.54, "learning_rate": 3.2483999516966554e-06, "loss": 0.2459, "step": 7743, "task_loss": 0.31726622581481934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31375652551651, "epoch": 6.55, "learning_rate": 3.2423620335708247e-06, "loss": 0.3182, "step": 7744, "task_loss": 0.5596864819526672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2410619556903839, "epoch": 6.55, "learning_rate": 3.236324115444995e-06, "loss": 0.3414, "step": 7745, "task_loss": 0.518237292766571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22634145617485046, "epoch": 6.55, "learning_rate": 3.230286197319164e-06, "loss": 0.2746, "step": 7746, "task_loss": 0.24403728544712067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6215360164642334, "epoch": 6.55, "learning_rate": 3.2242482791933343e-06, "loss": 0.3699, "step": 7747, "task_loss": 0.704669713973999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1534210741519928, "epoch": 6.55, "learning_rate": 3.2182103610675045e-06, "loss": 0.2341, "step": 7748, "task_loss": 0.020347775891423225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34189504384994507, "epoch": 6.55, "learning_rate": 3.2121724429416738e-06, "loss": 0.4352, "step": 7749, "task_loss": 0.8975735902786255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28088268637657166, "epoch": 6.55, "learning_rate": 3.206134524815844e-06, "loss": 0.2953, "step": 7750, "task_loss": 0.635474681854248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2819012403488159, "epoch": 6.55, "learning_rate": 3.200096606690013e-06, "loss": 0.3413, "step": 7751, "task_loss": 0.27518609166145325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1857130378484726, "epoch": 6.55, "learning_rate": 3.1940586885641833e-06, "loss": 0.3681, "step": 7752, "task_loss": 0.8708760738372803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4891083240509033, "epoch": 6.55, "learning_rate": 3.1880207704383535e-06, "loss": 0.3612, "step": 7753, "task_loss": 0.5386810302734375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3239248991012573, "epoch": 6.55, "learning_rate": 3.181982852312523e-06, "loss": 0.385, "step": 7754, "task_loss": 1.592814326286316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.597692608833313, "epoch": 6.56, "learning_rate": 3.1759449341866925e-06, "loss": 0.4023, "step": 7755, "task_loss": 0.9571277499198914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2451781928539276, "epoch": 6.56, "learning_rate": 3.1699070160608622e-06, "loss": 0.2442, "step": 7756, "task_loss": 0.5085301995277405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20908300578594208, "epoch": 6.56, "learning_rate": 3.163869097935032e-06, "loss": 0.2688, "step": 7757, "task_loss": 0.46432510018348694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41582638025283813, "epoch": 6.56, "learning_rate": 3.157831179809202e-06, "loss": 0.3452, "step": 7758, "task_loss": 0.9008634090423584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20233100652694702, "epoch": 6.56, "learning_rate": 3.1517932616833714e-06, "loss": 0.2613, "step": 7759, "task_loss": 0.45042917132377625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23195208609104156, "epoch": 6.56, "learning_rate": 3.1457553435575416e-06, "loss": 0.3453, "step": 7760, "task_loss": 0.2590802013874054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4356086254119873, "epoch": 6.56, "learning_rate": 3.139717425431711e-06, "loss": 0.3223, "step": 7761, "task_loss": 0.6922436952590942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18309235572814941, "epoch": 6.56, "learning_rate": 3.133679507305881e-06, "loss": 0.2601, "step": 7762, "task_loss": 0.3537808060646057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41175904870033264, "epoch": 6.56, "learning_rate": 3.127641589180051e-06, "loss": 0.2964, "step": 7763, "task_loss": 0.6723619103431702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3786485195159912, "epoch": 6.56, "learning_rate": 3.1216036710542205e-06, "loss": 0.2808, "step": 7764, "task_loss": 0.7922626733779907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18051612377166748, "epoch": 6.56, "learning_rate": 3.11556575292839e-06, "loss": 0.2925, "step": 7765, "task_loss": 0.7401147484779358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24153660237789154, "epoch": 6.56, "learning_rate": 3.1095278348025603e-06, "loss": 0.2945, "step": 7766, "task_loss": 0.33426788449287415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20641401410102844, "epoch": 6.57, "learning_rate": 3.10348991667673e-06, "loss": 0.292, "step": 7767, "task_loss": 0.271756112575531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16532520949840546, "epoch": 6.57, "learning_rate": 3.0974519985508998e-06, "loss": 0.3312, "step": 7768, "task_loss": 0.647797703742981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.05973302200436592, "epoch": 6.57, "learning_rate": 3.0914140804250695e-06, "loss": 0.1923, "step": 7769, "task_loss": 0.01611517369747162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3352602422237396, "epoch": 6.57, "learning_rate": 3.0853761622992392e-06, "loss": 0.3434, "step": 7770, "task_loss": 0.26947396993637085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39845141768455505, "epoch": 6.57, "learning_rate": 3.0793382441734094e-06, "loss": 0.3454, "step": 7771, "task_loss": 0.8607264757156372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23900604248046875, "epoch": 6.57, "learning_rate": 3.073300326047579e-06, "loss": 0.2208, "step": 7772, "task_loss": 0.3705090582370758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3910104036331177, "epoch": 6.57, "learning_rate": 3.067262407921749e-06, "loss": 0.3844, "step": 7773, "task_loss": 1.2109107971191406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3214458227157593, "epoch": 6.57, "learning_rate": 3.0612244897959185e-06, "loss": 0.4273, "step": 7774, "task_loss": 0.5226218104362488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3894149363040924, "epoch": 6.57, "learning_rate": 3.0551865716700883e-06, "loss": 0.2542, "step": 7775, "task_loss": 0.36270880699157715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32939770817756653, "epoch": 6.57, "learning_rate": 3.0491486535442584e-06, "loss": 0.2926, "step": 7776, "task_loss": 0.8244893550872803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2818754017353058, "epoch": 6.57, "learning_rate": 3.043110735418428e-06, "loss": 0.2604, "step": 7777, "task_loss": 0.4704045057296753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2879902124404907, "epoch": 6.57, "learning_rate": 3.037072817292598e-06, "loss": 0.2722, "step": 7778, "task_loss": 0.8031188249588013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31563055515289307, "epoch": 6.58, "learning_rate": 3.0310348991667676e-06, "loss": 0.3829, "step": 7779, "task_loss": 0.8495994210243225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31782063841819763, "epoch": 6.58, "learning_rate": 3.0249969810409373e-06, "loss": 0.291, "step": 7780, "task_loss": 0.7442578673362732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3889234662055969, "epoch": 6.58, "learning_rate": 3.018959062915107e-06, "loss": 0.3592, "step": 7781, "task_loss": 0.8456394672393799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20989277958869934, "epoch": 6.58, "learning_rate": 3.0129211447892768e-06, "loss": 0.2937, "step": 7782, "task_loss": 0.07926666736602783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36636269092559814, "epoch": 6.58, "learning_rate": 3.0068832266634465e-06, "loss": 0.3376, "step": 7783, "task_loss": 1.0724951028823853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31912216544151306, "epoch": 6.58, "learning_rate": 3.0008453085376162e-06, "loss": 0.3704, "step": 7784, "task_loss": 0.8201545476913452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.12486156076192856, "epoch": 6.58, "learning_rate": 2.994807390411786e-06, "loss": 0.2348, "step": 7785, "task_loss": 0.2843942940235138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2780429720878601, "epoch": 6.58, "learning_rate": 2.9887694722859557e-06, "loss": 0.3756, "step": 7786, "task_loss": 0.9842714071273804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5913220643997192, "epoch": 6.58, "learning_rate": 2.982731554160126e-06, "loss": 0.3304, "step": 7787, "task_loss": 0.9115258455276489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29290324449539185, "epoch": 6.58, "learning_rate": 2.9766936360342955e-06, "loss": 0.2461, "step": 7788, "task_loss": 0.53319251537323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2740883231163025, "epoch": 6.58, "learning_rate": 2.9706557179084653e-06, "loss": 0.221, "step": 7789, "task_loss": 0.34767764806747437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24867141246795654, "epoch": 6.58, "learning_rate": 2.964617799782635e-06, "loss": 0.312, "step": 7790, "task_loss": 0.15427851676940918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.35512495040893555, "epoch": 6.59, "learning_rate": 2.9585798816568047e-06, "loss": 0.3417, "step": 7791, "task_loss": 0.6529455184936523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5198819637298584, "epoch": 6.59, "learning_rate": 2.952541963530975e-06, "loss": 0.3551, "step": 7792, "task_loss": 1.2599546909332275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30709370970726013, "epoch": 6.59, "learning_rate": 2.9465040454051446e-06, "loss": 0.3204, "step": 7793, "task_loss": 0.8366016149520874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5353522896766663, "epoch": 6.59, "learning_rate": 2.9404661272793143e-06, "loss": 0.4286, "step": 7794, "task_loss": 0.472912460565567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4839637577533722, "epoch": 6.59, "learning_rate": 2.934428209153484e-06, "loss": 0.4272, "step": 7795, "task_loss": 0.5716564655303955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2400052696466446, "epoch": 6.59, "learning_rate": 2.9283902910276537e-06, "loss": 0.3252, "step": 7796, "task_loss": 0.6447170972824097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18614108860492706, "epoch": 6.59, "learning_rate": 2.922352372901824e-06, "loss": 0.2453, "step": 7797, "task_loss": 0.2785165011882782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28552165627479553, "epoch": 6.59, "learning_rate": 2.9163144547759936e-06, "loss": 0.2791, "step": 7798, "task_loss": 0.4893183410167694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27799782156944275, "epoch": 6.59, "learning_rate": 2.9102765366501633e-06, "loss": 0.3598, "step": 7799, "task_loss": 0.3661113977432251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1490083634853363, "epoch": 6.59, "learning_rate": 2.904238618524333e-06, "loss": 0.3108, "step": 7800, "task_loss": 0.0720779150724411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2711118459701538, "epoch": 6.59, "learning_rate": 2.898200700398503e-06, "loss": 0.4035, "step": 7801, "task_loss": 0.1422618329524994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17437247931957245, "epoch": 6.59, "learning_rate": 2.8921627822726725e-06, "loss": 0.301, "step": 7802, "task_loss": 0.47586122155189514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3608804941177368, "epoch": 6.6, "learning_rate": 2.8861248641468422e-06, "loss": 0.3708, "step": 7803, "task_loss": 0.6960621476173401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31532007455825806, "epoch": 6.6, "learning_rate": 2.880086946021012e-06, "loss": 0.3451, "step": 7804, "task_loss": 0.6783230304718018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2638261914253235, "epoch": 6.6, "learning_rate": 2.8740490278951817e-06, "loss": 0.2718, "step": 7805, "task_loss": 0.687343180179596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26927652955055237, "epoch": 6.6, "learning_rate": 2.8680111097693514e-06, "loss": 0.357, "step": 7806, "task_loss": 0.6981947422027588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3005393445491791, "epoch": 6.6, "learning_rate": 2.8619731916435216e-06, "loss": 0.3023, "step": 7807, "task_loss": 1.2326972484588623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2538709342479706, "epoch": 6.6, "learning_rate": 2.8559352735176913e-06, "loss": 0.3224, "step": 7808, "task_loss": 0.5890560746192932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20191287994384766, "epoch": 6.6, "learning_rate": 2.849897355391861e-06, "loss": 0.2698, "step": 7809, "task_loss": 1.242549180984497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41216808557510376, "epoch": 6.6, "learning_rate": 2.8438594372660307e-06, "loss": 0.4515, "step": 7810, "task_loss": 0.20650440454483032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19454407691955566, "epoch": 6.6, "learning_rate": 2.8378215191402005e-06, "loss": 0.2658, "step": 7811, "task_loss": 0.9056940078735352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23432233929634094, "epoch": 6.6, "learning_rate": 2.83178360101437e-06, "loss": 0.3493, "step": 7812, "task_loss": 0.08802898973226547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22374801337718964, "epoch": 6.6, "learning_rate": 2.8257456828885403e-06, "loss": 0.2897, "step": 7813, "task_loss": 0.4004828929901123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3277839124202728, "epoch": 6.6, "learning_rate": 2.81970776476271e-06, "loss": 0.3054, "step": 7814, "task_loss": 0.2952903211116791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1872561126947403, "epoch": 6.61, "learning_rate": 2.8136698466368798e-06, "loss": 0.3301, "step": 7815, "task_loss": 0.3981112837791443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15059933066368103, "epoch": 6.61, "learning_rate": 2.8076319285110495e-06, "loss": 0.2161, "step": 7816, "task_loss": 0.471725732088089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4065919518470764, "epoch": 6.61, "learning_rate": 2.8015940103852192e-06, "loss": 0.38, "step": 7817, "task_loss": 0.9121754765510559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34813132882118225, "epoch": 6.61, "learning_rate": 2.7955560922593894e-06, "loss": 0.3263, "step": 7818, "task_loss": 0.5133805871009827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40414801239967346, "epoch": 6.61, "learning_rate": 2.789518174133559e-06, "loss": 0.32, "step": 7819, "task_loss": 0.9553497433662415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3113330602645874, "epoch": 6.61, "learning_rate": 2.783480256007729e-06, "loss": 0.2742, "step": 7820, "task_loss": 1.0593557357788086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3978068232536316, "epoch": 6.61, "learning_rate": 2.7774423378818985e-06, "loss": 0.3194, "step": 7821, "task_loss": 1.1322081089019775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21584804356098175, "epoch": 6.61, "learning_rate": 2.7714044197560683e-06, "loss": 0.2679, "step": 7822, "task_loss": 0.5362482070922852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33229583501815796, "epoch": 6.61, "learning_rate": 2.765366501630238e-06, "loss": 0.404, "step": 7823, "task_loss": 0.7968634366989136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31536009907722473, "epoch": 6.61, "learning_rate": 2.7593285835044077e-06, "loss": 0.3988, "step": 7824, "task_loss": 0.8848350644111633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.09619975090026855, "epoch": 6.61, "learning_rate": 2.7532906653785774e-06, "loss": 0.1927, "step": 7825, "task_loss": 0.04058408364653587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2858276069164276, "epoch": 6.61, "learning_rate": 2.747252747252747e-06, "loss": 0.2934, "step": 7826, "task_loss": 0.9979034662246704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3922584056854248, "epoch": 6.62, "learning_rate": 2.741214829126917e-06, "loss": 0.399, "step": 7827, "task_loss": 1.3582017421722412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3518364429473877, "epoch": 6.62, "learning_rate": 2.735176911001087e-06, "loss": 0.35, "step": 7828, "task_loss": 1.0203566551208496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2179040014743805, "epoch": 6.62, "learning_rate": 2.7291389928752568e-06, "loss": 0.2694, "step": 7829, "task_loss": 0.6877054572105408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2079608291387558, "epoch": 6.62, "learning_rate": 2.7231010747494265e-06, "loss": 0.3922, "step": 7830, "task_loss": 0.42423394322395325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28286755084991455, "epoch": 6.62, "learning_rate": 2.717063156623596e-06, "loss": 0.341, "step": 7831, "task_loss": 0.4646259546279907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2169375866651535, "epoch": 6.62, "learning_rate": 2.711025238497766e-06, "loss": 0.1938, "step": 7832, "task_loss": 0.16754037141799927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1400134563446045, "epoch": 6.62, "learning_rate": 2.7049873203719357e-06, "loss": 0.3148, "step": 7833, "task_loss": 0.6180738806724548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2957953214645386, "epoch": 6.62, "learning_rate": 2.698949402246106e-06, "loss": 0.3421, "step": 7834, "task_loss": 1.0550485849380493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20371083915233612, "epoch": 6.62, "learning_rate": 2.6929114841202755e-06, "loss": 0.269, "step": 7835, "task_loss": 0.46303343772888184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17086276412010193, "epoch": 6.62, "learning_rate": 2.6868735659944453e-06, "loss": 0.3604, "step": 7836, "task_loss": 1.0650434494018555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2817338705062866, "epoch": 6.62, "learning_rate": 2.680835647868615e-06, "loss": 0.3292, "step": 7837, "task_loss": 0.29514241218566895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32456904649734497, "epoch": 6.63, "learning_rate": 2.6747977297427847e-06, "loss": 0.279, "step": 7838, "task_loss": 0.8603042960166931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27427446842193604, "epoch": 6.63, "learning_rate": 2.668759811616955e-06, "loss": 0.3884, "step": 7839, "task_loss": 1.310402274131775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44368675351142883, "epoch": 6.63, "learning_rate": 2.6627218934911246e-06, "loss": 0.3285, "step": 7840, "task_loss": 1.4847962856292725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16925343871116638, "epoch": 6.63, "learning_rate": 2.6566839753652943e-06, "loss": 0.2743, "step": 7841, "task_loss": 0.012926310300827026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3711474537849426, "epoch": 6.63, "learning_rate": 2.650646057239464e-06, "loss": 0.4352, "step": 7842, "task_loss": 0.30901482701301575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2927723824977875, "epoch": 6.63, "learning_rate": 2.6446081391136337e-06, "loss": 0.397, "step": 7843, "task_loss": 0.48928651213645935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3235238790512085, "epoch": 6.63, "learning_rate": 2.638570220987804e-06, "loss": 0.2386, "step": 7844, "task_loss": 0.3520253598690033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22846920788288116, "epoch": 6.63, "learning_rate": 2.632532302861973e-06, "loss": 0.3139, "step": 7845, "task_loss": 0.7619253396987915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4414099454879761, "epoch": 6.63, "learning_rate": 2.626494384736143e-06, "loss": 0.3902, "step": 7846, "task_loss": 1.5078818798065186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3305150270462036, "epoch": 6.63, "learning_rate": 2.6204564666103126e-06, "loss": 0.3083, "step": 7847, "task_loss": 0.7912778258323669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2536662220954895, "epoch": 6.63, "learning_rate": 2.6144185484844824e-06, "loss": 0.2337, "step": 7848, "task_loss": 0.30832239985466003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20125135779380798, "epoch": 6.63, "learning_rate": 2.6083806303586525e-06, "loss": 0.2741, "step": 7849, "task_loss": 0.4208551049232483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3672713041305542, "epoch": 6.64, "learning_rate": 2.6023427122328222e-06, "loss": 0.2663, "step": 7850, "task_loss": 0.7185035347938538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6367277503013611, "epoch": 6.64, "learning_rate": 2.596304794106992e-06, "loss": 0.3662, "step": 7851, "task_loss": 0.5996759533882141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5343726873397827, "epoch": 6.64, "learning_rate": 2.5902668759811617e-06, "loss": 0.3682, "step": 7852, "task_loss": 0.43795907497406006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2450995147228241, "epoch": 6.64, "learning_rate": 2.5842289578553314e-06, "loss": 0.2925, "step": 7853, "task_loss": 0.4793815016746521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16859784722328186, "epoch": 6.64, "learning_rate": 2.578191039729501e-06, "loss": 0.3277, "step": 7854, "task_loss": 0.0366772823035717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5129860639572144, "epoch": 6.64, "learning_rate": 2.5721531216036713e-06, "loss": 0.3686, "step": 7855, "task_loss": 0.9478433132171631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3270860016345978, "epoch": 6.64, "learning_rate": 2.566115203477841e-06, "loss": 0.3978, "step": 7856, "task_loss": 0.7920700907707214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22866550087928772, "epoch": 6.64, "learning_rate": 2.5600772853520107e-06, "loss": 0.2307, "step": 7857, "task_loss": 0.3095148503780365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4214126467704773, "epoch": 6.64, "learning_rate": 2.5540393672261805e-06, "loss": 0.2842, "step": 7858, "task_loss": 0.18533141911029816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37626227736473083, "epoch": 6.64, "learning_rate": 2.54800144910035e-06, "loss": 0.3011, "step": 7859, "task_loss": 0.6334812641143799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2395513355731964, "epoch": 6.64, "learning_rate": 2.5419635309745203e-06, "loss": 0.2517, "step": 7860, "task_loss": 0.7270482182502747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1549045294523239, "epoch": 6.64, "learning_rate": 2.53592561284869e-06, "loss": 0.3069, "step": 7861, "task_loss": 0.6192334890365601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26047009229660034, "epoch": 6.65, "learning_rate": 2.5298876947228598e-06, "loss": 0.2959, "step": 7862, "task_loss": 0.19074557721614838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2512180805206299, "epoch": 6.65, "learning_rate": 2.5238497765970295e-06, "loss": 0.3278, "step": 7863, "task_loss": 0.6209376454353333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3149875998497009, "epoch": 6.65, "learning_rate": 2.5178118584711992e-06, "loss": 0.3754, "step": 7864, "task_loss": 0.23959144949913025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4646156132221222, "epoch": 6.65, "learning_rate": 2.5117739403453694e-06, "loss": 0.4247, "step": 7865, "task_loss": 1.005577802658081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37485551834106445, "epoch": 6.65, "learning_rate": 2.505736022219539e-06, "loss": 0.4225, "step": 7866, "task_loss": 0.32001957297325134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22256316244602203, "epoch": 6.65, "learning_rate": 2.499698104093709e-06, "loss": 0.2779, "step": 7867, "task_loss": 0.25138506293296814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16069689393043518, "epoch": 6.65, "learning_rate": 2.4936601859678785e-06, "loss": 0.266, "step": 7868, "task_loss": 0.28903728723526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16692887246608734, "epoch": 6.65, "learning_rate": 2.4876222678420483e-06, "loss": 0.2633, "step": 7869, "task_loss": 0.25879353284835815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2109099179506302, "epoch": 6.65, "learning_rate": 2.481584349716218e-06, "loss": 0.2711, "step": 7870, "task_loss": 0.7519676089286804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6710304021835327, "epoch": 6.65, "learning_rate": 2.4755464315903877e-06, "loss": 0.3818, "step": 7871, "task_loss": 1.5434980392456055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15128664672374725, "epoch": 6.65, "learning_rate": 2.4695085134645574e-06, "loss": 0.4076, "step": 7872, "task_loss": 0.429664671421051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3181723952293396, "epoch": 6.65, "learning_rate": 2.463470595338727e-06, "loss": 0.3035, "step": 7873, "task_loss": 0.8402377367019653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19805902242660522, "epoch": 6.66, "learning_rate": 2.457432677212897e-06, "loss": 0.2212, "step": 7874, "task_loss": 0.20292223989963531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28453460335731506, "epoch": 6.66, "learning_rate": 2.4513947590870666e-06, "loss": 0.3628, "step": 7875, "task_loss": 0.8196080327033997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36242592334747314, "epoch": 6.66, "learning_rate": 2.4453568409612368e-06, "loss": 0.3478, "step": 7876, "task_loss": 0.07227878272533417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19338369369506836, "epoch": 6.66, "learning_rate": 2.4393189228354065e-06, "loss": 0.2797, "step": 7877, "task_loss": 0.9016945362091064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3498687148094177, "epoch": 6.66, "learning_rate": 2.433281004709576e-06, "loss": 0.2641, "step": 7878, "task_loss": 0.38190221786499023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24728785455226898, "epoch": 6.66, "learning_rate": 2.427243086583746e-06, "loss": 0.3211, "step": 7879, "task_loss": 0.2424931675195694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2155051827430725, "epoch": 6.66, "learning_rate": 2.4212051684579157e-06, "loss": 0.2559, "step": 7880, "task_loss": 0.411943644285202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3026154339313507, "epoch": 6.66, "learning_rate": 2.415167250332086e-06, "loss": 0.3378, "step": 7881, "task_loss": 1.2789057493209839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2789454460144043, "epoch": 6.66, "learning_rate": 2.4091293322062555e-06, "loss": 0.2569, "step": 7882, "task_loss": 0.690176784992218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21678873896598816, "epoch": 6.66, "learning_rate": 2.4030914140804253e-06, "loss": 0.2744, "step": 7883, "task_loss": 0.322922945022583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3468315005302429, "epoch": 6.66, "learning_rate": 2.397053495954595e-06, "loss": 0.3325, "step": 7884, "task_loss": 0.6196610331535339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16144081950187683, "epoch": 6.66, "learning_rate": 2.3910155778287647e-06, "loss": 0.3048, "step": 7885, "task_loss": 0.22956416010856628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30843669176101685, "epoch": 6.67, "learning_rate": 2.384977659702935e-06, "loss": 0.3696, "step": 7886, "task_loss": 0.45834460854530334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3306000828742981, "epoch": 6.67, "learning_rate": 2.3789397415771046e-06, "loss": 0.351, "step": 7887, "task_loss": 1.1747992038726807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15623460710048676, "epoch": 6.67, "learning_rate": 2.3729018234512743e-06, "loss": 0.268, "step": 7888, "task_loss": 0.7163028717041016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32011646032333374, "epoch": 6.67, "learning_rate": 2.366863905325444e-06, "loss": 0.2936, "step": 7889, "task_loss": 1.4384424686431885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43689435720443726, "epoch": 6.67, "learning_rate": 2.3608259871996137e-06, "loss": 0.2462, "step": 7890, "task_loss": 0.7369303107261658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14511778950691223, "epoch": 6.67, "learning_rate": 2.3547880690737835e-06, "loss": 0.2229, "step": 7891, "task_loss": 0.18630759418010712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41285789012908936, "epoch": 6.67, "learning_rate": 2.348750150947953e-06, "loss": 0.2798, "step": 7892, "task_loss": 0.4832552969455719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23340186476707458, "epoch": 6.67, "learning_rate": 2.342712232822123e-06, "loss": 0.3195, "step": 7893, "task_loss": 0.290322870016098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22053898870944977, "epoch": 6.67, "learning_rate": 2.3366743146962926e-06, "loss": 0.4392, "step": 7894, "task_loss": 0.6644067168235779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24344241619110107, "epoch": 6.67, "learning_rate": 2.3306363965704624e-06, "loss": 0.264, "step": 7895, "task_loss": 0.2051757276058197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13733549416065216, "epoch": 6.67, "learning_rate": 2.324598478444632e-06, "loss": 0.287, "step": 7896, "task_loss": 0.5162937641143799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.410758376121521, "epoch": 6.67, "learning_rate": 2.3185605603188022e-06, "loss": 0.2955, "step": 7897, "task_loss": 1.1313782930374146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15754634141921997, "epoch": 6.68, "learning_rate": 2.312522642192972e-06, "loss": 0.293, "step": 7898, "task_loss": 0.3658483624458313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.257119357585907, "epoch": 6.68, "learning_rate": 2.3064847240671417e-06, "loss": 0.3038, "step": 7899, "task_loss": 0.4583624601364136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2170518934726715, "epoch": 6.68, "learning_rate": 2.3004468059413114e-06, "loss": 0.3505, "step": 7900, "task_loss": 0.16309964656829834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24254073202610016, "epoch": 6.68, "learning_rate": 2.294408887815481e-06, "loss": 0.3067, "step": 7901, "task_loss": 0.8957123160362244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27926453948020935, "epoch": 6.68, "learning_rate": 2.2883709696896513e-06, "loss": 0.3576, "step": 7902, "task_loss": 0.8026703596115112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16419368982315063, "epoch": 6.68, "learning_rate": 2.282333051563821e-06, "loss": 0.2541, "step": 7903, "task_loss": 0.526229202747345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19361138343811035, "epoch": 6.68, "learning_rate": 2.2762951334379907e-06, "loss": 0.3887, "step": 7904, "task_loss": 1.4042726755142212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17034214735031128, "epoch": 6.68, "learning_rate": 2.2702572153121605e-06, "loss": 0.3618, "step": 7905, "task_loss": 0.15841732919216156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47590386867523193, "epoch": 6.68, "learning_rate": 2.26421929718633e-06, "loss": 0.382, "step": 7906, "task_loss": 0.4087643623352051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23040196299552917, "epoch": 6.68, "learning_rate": 2.2581813790605003e-06, "loss": 0.2542, "step": 7907, "task_loss": 0.18354520201683044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2786344587802887, "epoch": 6.68, "learning_rate": 2.25214346093467e-06, "loss": 0.3173, "step": 7908, "task_loss": 0.4397277235984802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14116652309894562, "epoch": 6.69, "learning_rate": 2.2461055428088398e-06, "loss": 0.327, "step": 7909, "task_loss": 0.013777644373476505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22628964483737946, "epoch": 6.69, "learning_rate": 2.2400676246830095e-06, "loss": 0.251, "step": 7910, "task_loss": 1.3941057920455933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1774052232503891, "epoch": 6.69, "learning_rate": 2.2340297065571792e-06, "loss": 0.2598, "step": 7911, "task_loss": 0.04826068878173828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17893150448799133, "epoch": 6.69, "learning_rate": 2.227991788431349e-06, "loss": 0.2268, "step": 7912, "task_loss": 0.4433581829071045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29776430130004883, "epoch": 6.69, "learning_rate": 2.2219538703055187e-06, "loss": 0.3594, "step": 7913, "task_loss": 0.6426287293434143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.172685444355011, "epoch": 6.69, "learning_rate": 2.2159159521796884e-06, "loss": 0.2859, "step": 7914, "task_loss": 0.5530482530593872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41440972685813904, "epoch": 6.69, "learning_rate": 2.209878034053858e-06, "loss": 0.3192, "step": 7915, "task_loss": 1.6321061849594116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18748615682125092, "epoch": 6.69, "learning_rate": 2.203840115928028e-06, "loss": 0.2886, "step": 7916, "task_loss": 0.26245152950286865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.295779287815094, "epoch": 6.69, "learning_rate": 2.197802197802198e-06, "loss": 0.3626, "step": 7917, "task_loss": 0.19294299185276031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18876810371875763, "epoch": 6.69, "learning_rate": 2.1917642796763677e-06, "loss": 0.2843, "step": 7918, "task_loss": 0.050800710916519165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4296872019767761, "epoch": 6.69, "learning_rate": 2.1857263615505374e-06, "loss": 0.295, "step": 7919, "task_loss": 0.4813189208507538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38008639216423035, "epoch": 6.69, "learning_rate": 2.179688443424707e-06, "loss": 0.337, "step": 7920, "task_loss": 0.36738958954811096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28309062123298645, "epoch": 6.7, "learning_rate": 2.173650525298877e-06, "loss": 0.283, "step": 7921, "task_loss": 0.5545055866241455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1828785091638565, "epoch": 6.7, "learning_rate": 2.1676126071730466e-06, "loss": 0.3604, "step": 7922, "task_loss": 0.46906936168670654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39464104175567627, "epoch": 6.7, "learning_rate": 2.1615746890472168e-06, "loss": 0.3864, "step": 7923, "task_loss": 0.40161725878715515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4153671860694885, "epoch": 6.7, "learning_rate": 2.1555367709213865e-06, "loss": 0.3615, "step": 7924, "task_loss": 0.530505359172821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20175360143184662, "epoch": 6.7, "learning_rate": 2.149498852795556e-06, "loss": 0.2912, "step": 7925, "task_loss": 0.9041904807090759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27381274104118347, "epoch": 6.7, "learning_rate": 2.143460934669726e-06, "loss": 0.2964, "step": 7926, "task_loss": 0.29992374777793884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3550863265991211, "epoch": 6.7, "learning_rate": 2.1374230165438957e-06, "loss": 0.2812, "step": 7927, "task_loss": 0.5639547109603882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4246624708175659, "epoch": 6.7, "learning_rate": 2.131385098418066e-06, "loss": 0.3612, "step": 7928, "task_loss": 0.8672881126403809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2313038408756256, "epoch": 6.7, "learning_rate": 2.1253471802922355e-06, "loss": 0.2552, "step": 7929, "task_loss": 0.2760171592235565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4265955686569214, "epoch": 6.7, "learning_rate": 2.1193092621664052e-06, "loss": 0.393, "step": 7930, "task_loss": 0.5556285381317139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18905827403068542, "epoch": 6.7, "learning_rate": 2.113271344040575e-06, "loss": 0.3295, "step": 7931, "task_loss": 0.07954779267311096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1920003592967987, "epoch": 6.7, "learning_rate": 2.1072334259147447e-06, "loss": 0.358, "step": 7932, "task_loss": 0.3587722182273865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21588607132434845, "epoch": 6.71, "learning_rate": 2.101195507788915e-06, "loss": 0.2737, "step": 7933, "task_loss": 0.7676540613174438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21186354756355286, "epoch": 6.71, "learning_rate": 2.0951575896630846e-06, "loss": 0.2769, "step": 7934, "task_loss": 0.3564017117023468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30953115224838257, "epoch": 6.71, "learning_rate": 2.089119671537254e-06, "loss": 0.343, "step": 7935, "task_loss": 0.48304739594459534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21278584003448486, "epoch": 6.71, "learning_rate": 2.0830817534114236e-06, "loss": 0.3105, "step": 7936, "task_loss": 0.6912065744400024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.279635488986969, "epoch": 6.71, "learning_rate": 2.0770438352855933e-06, "loss": 0.2957, "step": 7937, "task_loss": 0.6083744764328003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16653001308441162, "epoch": 6.71, "learning_rate": 2.0710059171597635e-06, "loss": 0.216, "step": 7938, "task_loss": 0.1500818282365799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3697958290576935, "epoch": 6.71, "learning_rate": 2.064967999033933e-06, "loss": 0.3086, "step": 7939, "task_loss": 0.8267338275909424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5258051156997681, "epoch": 6.71, "learning_rate": 2.058930080908103e-06, "loss": 0.4224, "step": 7940, "task_loss": 0.9614468812942505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19108134508132935, "epoch": 6.71, "learning_rate": 2.0528921627822726e-06, "loss": 0.3085, "step": 7941, "task_loss": 0.516747772693634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25276464223861694, "epoch": 6.71, "learning_rate": 2.0468542446564424e-06, "loss": 0.2134, "step": 7942, "task_loss": 0.39300620555877686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31674352288246155, "epoch": 6.71, "learning_rate": 2.040816326530612e-06, "loss": 0.2342, "step": 7943, "task_loss": 0.40965473651885986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.11096437275409698, "epoch": 6.71, "learning_rate": 2.0347784084047822e-06, "loss": 0.2357, "step": 7944, "task_loss": 0.3569726049900055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20480112731456757, "epoch": 6.72, "learning_rate": 2.028740490278952e-06, "loss": 0.2533, "step": 7945, "task_loss": 0.830033540725708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32517170906066895, "epoch": 6.72, "learning_rate": 2.0227025721531217e-06, "loss": 0.2848, "step": 7946, "task_loss": 0.7237549424171448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14681537449359894, "epoch": 6.72, "learning_rate": 2.0166646540272914e-06, "loss": 0.2515, "step": 7947, "task_loss": 0.3702457547187805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2509031593799591, "epoch": 6.72, "learning_rate": 2.010626735901461e-06, "loss": 0.3236, "step": 7948, "task_loss": 0.9990302324295044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1465664505958557, "epoch": 6.72, "learning_rate": 2.0045888177756313e-06, "loss": 0.1844, "step": 7949, "task_loss": 0.42142030596733093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42340749502182007, "epoch": 6.72, "learning_rate": 1.998550899649801e-06, "loss": 0.3404, "step": 7950, "task_loss": 0.6303499341011047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.47147268056869507, "epoch": 6.72, "learning_rate": 1.9925129815239707e-06, "loss": 0.3102, "step": 7951, "task_loss": 0.9318164587020874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43019574880599976, "epoch": 6.72, "learning_rate": 1.9864750633981404e-06, "loss": 0.2639, "step": 7952, "task_loss": 0.5483757853507996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2852446436882019, "epoch": 6.72, "learning_rate": 1.98043714527231e-06, "loss": 0.344, "step": 7953, "task_loss": 0.09458243101835251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15166445076465607, "epoch": 6.72, "learning_rate": 1.9743992271464803e-06, "loss": 0.238, "step": 7954, "task_loss": 0.45020076632499695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30763429403305054, "epoch": 6.72, "learning_rate": 1.96836130902065e-06, "loss": 0.3465, "step": 7955, "task_loss": 0.8793331384658813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17032763361930847, "epoch": 6.72, "learning_rate": 1.9623233908948198e-06, "loss": 0.358, "step": 7956, "task_loss": 0.5455321669578552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1940140724182129, "epoch": 6.73, "learning_rate": 1.9562854727689895e-06, "loss": 0.3292, "step": 7957, "task_loss": 0.11929430812597275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32110828161239624, "epoch": 6.73, "learning_rate": 1.9502475546431592e-06, "loss": 0.2877, "step": 7958, "task_loss": 0.7267234325408936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3669602572917938, "epoch": 6.73, "learning_rate": 1.944209636517329e-06, "loss": 0.3097, "step": 7959, "task_loss": 0.7395385503768921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2860534191131592, "epoch": 6.73, "learning_rate": 1.9381717183914987e-06, "loss": 0.2797, "step": 7960, "task_loss": 0.2837945818901062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28326329588890076, "epoch": 6.73, "learning_rate": 1.9321338002656684e-06, "loss": 0.2837, "step": 7961, "task_loss": 0.30190205574035645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22789062559604645, "epoch": 6.73, "learning_rate": 1.926095882139838e-06, "loss": 0.3687, "step": 7962, "task_loss": 0.04803982377052307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20868214964866638, "epoch": 6.73, "learning_rate": 1.920057964014008e-06, "loss": 0.2302, "step": 7963, "task_loss": 0.1920211911201477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3831107020378113, "epoch": 6.73, "learning_rate": 1.9140200458881776e-06, "loss": 0.2831, "step": 7964, "task_loss": 0.15343137085437775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.12626034021377563, "epoch": 6.73, "learning_rate": 1.9079821277623477e-06, "loss": 0.3052, "step": 7965, "task_loss": 0.28276708722114563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18034054338932037, "epoch": 6.73, "learning_rate": 1.9019442096365174e-06, "loss": 0.3169, "step": 7966, "task_loss": 0.31029602885246277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27745315432548523, "epoch": 6.73, "learning_rate": 1.8959062915106872e-06, "loss": 0.2576, "step": 7967, "task_loss": 0.25307080149650574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3063080906867981, "epoch": 6.73, "learning_rate": 1.8898683733848569e-06, "loss": 0.3825, "step": 7968, "task_loss": 1.4786051511764526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31577903032302856, "epoch": 6.74, "learning_rate": 1.8838304552590266e-06, "loss": 0.2541, "step": 7969, "task_loss": 0.17465519905090332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43017125129699707, "epoch": 6.74, "learning_rate": 1.8777925371331968e-06, "loss": 0.4221, "step": 7970, "task_loss": 1.0880897045135498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20631766319274902, "epoch": 6.74, "learning_rate": 1.8717546190073665e-06, "loss": 0.333, "step": 7971, "task_loss": 0.3785199522972107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23133569955825806, "epoch": 6.74, "learning_rate": 1.8657167008815362e-06, "loss": 0.3133, "step": 7972, "task_loss": 0.10537660121917725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32779860496520996, "epoch": 6.74, "learning_rate": 1.859678782755706e-06, "loss": 0.3384, "step": 7973, "task_loss": 0.8620181083679199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4288841485977173, "epoch": 6.74, "learning_rate": 1.8536408646298756e-06, "loss": 0.4423, "step": 7974, "task_loss": 0.8313360214233398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15801064670085907, "epoch": 6.74, "learning_rate": 1.8476029465040456e-06, "loss": 0.356, "step": 7975, "task_loss": 0.19901259243488312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25173044204711914, "epoch": 6.74, "learning_rate": 1.8415650283782153e-06, "loss": 0.3224, "step": 7976, "task_loss": 0.5323078036308289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17723192274570465, "epoch": 6.74, "learning_rate": 1.835527110252385e-06, "loss": 0.2824, "step": 7977, "task_loss": 0.2538309693336487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3579541742801666, "epoch": 6.74, "learning_rate": 1.8294891921265548e-06, "loss": 0.3325, "step": 7978, "task_loss": 0.44242650270462036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20745150744915009, "epoch": 6.74, "learning_rate": 1.8234512740007245e-06, "loss": 0.3437, "step": 7979, "task_loss": 1.6390907764434814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.204509899020195, "epoch": 6.75, "learning_rate": 1.8174133558748946e-06, "loss": 0.2914, "step": 7980, "task_loss": 0.7780982851982117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24483557045459747, "epoch": 6.75, "learning_rate": 1.8113754377490644e-06, "loss": 0.3046, "step": 7981, "task_loss": 0.30658265948295593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3993033766746521, "epoch": 6.75, "learning_rate": 1.805337519623234e-06, "loss": 0.3518, "step": 7982, "task_loss": 0.821790337562561 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22531205415725708, "epoch": 6.75, "learning_rate": 1.7992996014974038e-06, "loss": 0.4457, "step": 7983, "task_loss": 1.013142466545105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25335782766342163, "epoch": 6.75, "learning_rate": 1.7932616833715735e-06, "loss": 0.3346, "step": 7984, "task_loss": 0.12422861903905869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23694941401481628, "epoch": 6.75, "learning_rate": 1.7872237652457432e-06, "loss": 0.3362, "step": 7985, "task_loss": 0.23627586662769318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2700069546699524, "epoch": 6.75, "learning_rate": 1.7811858471199132e-06, "loss": 0.27, "step": 7986, "task_loss": 0.32429301738739014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1919606328010559, "epoch": 6.75, "learning_rate": 1.775147928994083e-06, "loss": 0.3776, "step": 7987, "task_loss": 0.4111919403076172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6294015645980835, "epoch": 6.75, "learning_rate": 1.7691100108682526e-06, "loss": 0.3965, "step": 7988, "task_loss": 0.6114701628684998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19678373634815216, "epoch": 6.75, "learning_rate": 1.7630720927424224e-06, "loss": 0.281, "step": 7989, "task_loss": 0.39588478207588196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2907596528530121, "epoch": 6.75, "learning_rate": 1.757034174616592e-06, "loss": 0.3265, "step": 7990, "task_loss": 0.6766212582588196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41545677185058594, "epoch": 6.75, "learning_rate": 1.7509962564907622e-06, "loss": 0.3228, "step": 7991, "task_loss": 0.4498574137687683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34787517786026, "epoch": 6.76, "learning_rate": 1.744958338364932e-06, "loss": 0.2787, "step": 7992, "task_loss": 0.4738418459892273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27309414744377136, "epoch": 6.76, "learning_rate": 1.7389204202391017e-06, "loss": 0.3203, "step": 7993, "task_loss": 0.20309272408485413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.10542068630456924, "epoch": 6.76, "learning_rate": 1.7328825021132714e-06, "loss": 0.2481, "step": 7994, "task_loss": 0.016744893044233322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.447588175535202, "epoch": 6.76, "learning_rate": 1.7268445839874411e-06, "loss": 0.2972, "step": 7995, "task_loss": 1.2155694961547852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33845996856689453, "epoch": 6.76, "learning_rate": 1.720806665861611e-06, "loss": 0.2562, "step": 7996, "task_loss": 1.2374494075775146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3010985851287842, "epoch": 6.76, "learning_rate": 1.7147687477357808e-06, "loss": 0.2986, "step": 7997, "task_loss": 1.1454955339431763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4735068678855896, "epoch": 6.76, "learning_rate": 1.7087308296099505e-06, "loss": 0.3103, "step": 7998, "task_loss": 1.0401740074157715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.345500111579895, "epoch": 6.76, "learning_rate": 1.7026929114841202e-06, "loss": 0.319, "step": 7999, "task_loss": 0.24495534598827362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2186127007007599, "epoch": 6.76, "learning_rate": 1.69665499335829e-06, "loss": 0.3393, "step": 8000, "task_loss": 0.580116868019104 }, { "epoch": 6.76, "eval_accuracy": 0.9204356435643565, "eval_loss": 0.19777916371822357, "eval_runtime": 315.287, "eval_samples_per_second": 80.086, "eval_steps_per_second": 0.628, "step": 8000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3183669447898865, "epoch": 6.76, "learning_rate": 1.69061707523246e-06, "loss": 0.412, "step": 8001, "task_loss": 0.24300865828990936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3969162404537201, "epoch": 6.76, "learning_rate": 1.6845791571066298e-06, "loss": 0.26, "step": 8002, "task_loss": 0.1728476732969284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4217739999294281, "epoch": 6.76, "learning_rate": 1.6785412389807996e-06, "loss": 0.3468, "step": 8003, "task_loss": 0.8634433746337891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2720533311367035, "epoch": 6.77, "learning_rate": 1.6725033208549693e-06, "loss": 0.258, "step": 8004, "task_loss": 0.4465816915035248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3825600743293762, "epoch": 6.77, "learning_rate": 1.666465402729139e-06, "loss": 0.3524, "step": 8005, "task_loss": 1.2596490383148193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27043306827545166, "epoch": 6.77, "learning_rate": 1.6604274846033087e-06, "loss": 0.3047, "step": 8006, "task_loss": 0.8486908674240112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2223532795906067, "epoch": 6.77, "learning_rate": 1.6543895664774787e-06, "loss": 0.3253, "step": 8007, "task_loss": 0.6109075546264648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5123768448829651, "epoch": 6.77, "learning_rate": 1.6483516483516484e-06, "loss": 0.3242, "step": 8008, "task_loss": 0.3796651363372803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.343381404876709, "epoch": 6.77, "learning_rate": 1.6423137302258181e-06, "loss": 0.3536, "step": 8009, "task_loss": 0.42433395981788635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23733410239219666, "epoch": 6.77, "learning_rate": 1.6362758120999878e-06, "loss": 0.3252, "step": 8010, "task_loss": 0.25709661841392517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23585985600948334, "epoch": 6.77, "learning_rate": 1.6302378939741576e-06, "loss": 0.3044, "step": 8011, "task_loss": 0.6627354025840759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22130192816257477, "epoch": 6.77, "learning_rate": 1.6241999758483277e-06, "loss": 0.2996, "step": 8012, "task_loss": 0.9312736988067627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40501105785369873, "epoch": 6.77, "learning_rate": 1.6181620577224974e-06, "loss": 0.4382, "step": 8013, "task_loss": 0.8539422750473022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33113813400268555, "epoch": 6.77, "learning_rate": 1.6121241395966672e-06, "loss": 0.2814, "step": 8014, "task_loss": 0.8542001843452454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17742842435836792, "epoch": 6.77, "learning_rate": 1.6060862214708369e-06, "loss": 0.3456, "step": 8015, "task_loss": 0.32290029525756836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.6013045907020569, "epoch": 6.78, "learning_rate": 1.6000483033450066e-06, "loss": 0.3648, "step": 8016, "task_loss": 0.6162604093551636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32732242345809937, "epoch": 6.78, "learning_rate": 1.5940103852191767e-06, "loss": 0.3189, "step": 8017, "task_loss": 0.5021353363990784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33298248052597046, "epoch": 6.78, "learning_rate": 1.5879724670933463e-06, "loss": 0.2973, "step": 8018, "task_loss": 1.1255041360855103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30120164155960083, "epoch": 6.78, "learning_rate": 1.581934548967516e-06, "loss": 0.3193, "step": 8019, "task_loss": 0.1960994452238083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2275829017162323, "epoch": 6.78, "learning_rate": 1.5758966308416857e-06, "loss": 0.3486, "step": 8020, "task_loss": 0.50175940990448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33161860704421997, "epoch": 6.78, "learning_rate": 1.5698587127158554e-06, "loss": 0.47, "step": 8021, "task_loss": 0.7204383015632629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39914026856422424, "epoch": 6.78, "learning_rate": 1.5638207945900256e-06, "loss": 0.447, "step": 8022, "task_loss": 0.20828816294670105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21714815497398376, "epoch": 6.78, "learning_rate": 1.557782876464195e-06, "loss": 0.2865, "step": 8023, "task_loss": 0.05916708707809448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1046949103474617, "epoch": 6.78, "learning_rate": 1.551744958338365e-06, "loss": 0.2308, "step": 8024, "task_loss": 0.5303388237953186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24957597255706787, "epoch": 6.78, "learning_rate": 1.5457070402125348e-06, "loss": 0.2315, "step": 8025, "task_loss": 0.531966507434845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23982229828834534, "epoch": 6.78, "learning_rate": 1.5396691220867047e-06, "loss": 0.2819, "step": 8026, "task_loss": 0.48429805040359497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3560762405395508, "epoch": 6.78, "learning_rate": 1.5336312039608744e-06, "loss": 0.3456, "step": 8027, "task_loss": 0.21281929314136505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19238969683647156, "epoch": 6.79, "learning_rate": 1.5275932858350441e-06, "loss": 0.3772, "step": 8028, "task_loss": 0.5471962690353394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.33166226744651794, "epoch": 6.79, "learning_rate": 1.521555367709214e-06, "loss": 0.2759, "step": 8029, "task_loss": 0.8484209179878235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18399450182914734, "epoch": 6.79, "learning_rate": 1.5155174495833838e-06, "loss": 0.2507, "step": 8030, "task_loss": 0.2805570960044861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2931387424468994, "epoch": 6.79, "learning_rate": 1.5094795314575535e-06, "loss": 0.3041, "step": 8031, "task_loss": 0.3960728347301483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.12830469012260437, "epoch": 6.79, "learning_rate": 1.5034416133317232e-06, "loss": 0.28, "step": 8032, "task_loss": 0.7053889036178589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2489912509918213, "epoch": 6.79, "learning_rate": 1.497403695205893e-06, "loss": 0.2665, "step": 8033, "task_loss": 0.3241889774799347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18245241045951843, "epoch": 6.79, "learning_rate": 1.491365777080063e-06, "loss": 0.2851, "step": 8034, "task_loss": 0.10006110370159149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13691267371177673, "epoch": 6.79, "learning_rate": 1.4853278589542326e-06, "loss": 0.241, "step": 8035, "task_loss": 0.5646165609359741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18260696530342102, "epoch": 6.79, "learning_rate": 1.4792899408284024e-06, "loss": 0.3157, "step": 8036, "task_loss": 0.4359910190105438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2580518126487732, "epoch": 6.79, "learning_rate": 1.4732520227025723e-06, "loss": 0.316, "step": 8037, "task_loss": 0.43617957830429077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3602808117866516, "epoch": 6.79, "learning_rate": 1.467214104576742e-06, "loss": 0.3521, "step": 8038, "task_loss": 0.45752713084220886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21448150277137756, "epoch": 6.79, "learning_rate": 1.461176186450912e-06, "loss": 0.3374, "step": 8039, "task_loss": 0.7821329236030579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31888994574546814, "epoch": 6.8, "learning_rate": 1.4551382683250817e-06, "loss": 0.2799, "step": 8040, "task_loss": 0.5619816780090332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24327656626701355, "epoch": 6.8, "learning_rate": 1.4491003501992514e-06, "loss": 0.3685, "step": 8041, "task_loss": 0.26906102895736694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16191744804382324, "epoch": 6.8, "learning_rate": 1.4430624320734211e-06, "loss": 0.2893, "step": 8042, "task_loss": 0.3937647044658661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22746962308883667, "epoch": 6.8, "learning_rate": 1.4370245139475908e-06, "loss": 0.224, "step": 8043, "task_loss": 0.15217357873916626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16601556539535522, "epoch": 6.8, "learning_rate": 1.4309865958217608e-06, "loss": 0.2644, "step": 8044, "task_loss": 0.4684045612812042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20447692275047302, "epoch": 6.8, "learning_rate": 1.4249486776959305e-06, "loss": 0.2215, "step": 8045, "task_loss": 0.48136505484580994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3991287052631378, "epoch": 6.8, "learning_rate": 1.4189107595701002e-06, "loss": 0.367, "step": 8046, "task_loss": 0.8106330037117004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34273314476013184, "epoch": 6.8, "learning_rate": 1.4128728414442702e-06, "loss": 0.2638, "step": 8047, "task_loss": 0.17120568454265594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24648833274841309, "epoch": 6.8, "learning_rate": 1.4068349233184399e-06, "loss": 0.2974, "step": 8048, "task_loss": 0.13945113122463226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29230666160583496, "epoch": 6.8, "learning_rate": 1.4007970051926096e-06, "loss": 0.3117, "step": 8049, "task_loss": 0.09335968643426895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17720624804496765, "epoch": 6.8, "learning_rate": 1.3947590870667795e-06, "loss": 0.2894, "step": 8050, "task_loss": 0.48743289709091187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16024263203144073, "epoch": 6.81, "learning_rate": 1.3887211689409493e-06, "loss": 0.2163, "step": 8051, "task_loss": 0.13241131603717804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25895488262176514, "epoch": 6.81, "learning_rate": 1.382683250815119e-06, "loss": 0.311, "step": 8052, "task_loss": 0.7730100750923157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3882974088191986, "epoch": 6.81, "learning_rate": 1.3766453326892887e-06, "loss": 0.2787, "step": 8053, "task_loss": 0.10828300565481186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.12716780602931976, "epoch": 6.81, "learning_rate": 1.3706074145634584e-06, "loss": 0.2224, "step": 8054, "task_loss": 0.31047144532203674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2503880560398102, "epoch": 6.81, "learning_rate": 1.3645694964376284e-06, "loss": 0.2147, "step": 8055, "task_loss": 0.7098690867424011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20645177364349365, "epoch": 6.81, "learning_rate": 1.358531578311798e-06, "loss": 0.2773, "step": 8056, "task_loss": 0.6831003427505493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4532225728034973, "epoch": 6.81, "learning_rate": 1.3524936601859678e-06, "loss": 0.3769, "step": 8057, "task_loss": 1.4172208309173584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20299270749092102, "epoch": 6.81, "learning_rate": 1.3464557420601378e-06, "loss": 0.254, "step": 8058, "task_loss": 0.7395471930503845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3072195053100586, "epoch": 6.81, "learning_rate": 1.3404178239343075e-06, "loss": 0.3359, "step": 8059, "task_loss": 0.1225191131234169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34933167695999146, "epoch": 6.81, "learning_rate": 1.3343799058084774e-06, "loss": 0.3722, "step": 8060, "task_loss": 0.33849799633026123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18557000160217285, "epoch": 6.81, "learning_rate": 1.3283419876826471e-06, "loss": 0.2715, "step": 8061, "task_loss": 0.11513186246156693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3575012683868408, "epoch": 6.81, "learning_rate": 1.3223040695568169e-06, "loss": 0.3972, "step": 8062, "task_loss": 0.5876013040542603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21574467420578003, "epoch": 6.82, "learning_rate": 1.3162661514309866e-06, "loss": 0.3562, "step": 8063, "task_loss": 0.5496799349784851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2965897023677826, "epoch": 6.82, "learning_rate": 1.3102282333051563e-06, "loss": 0.2286, "step": 8064, "task_loss": 0.5375605225563049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27486860752105713, "epoch": 6.82, "learning_rate": 1.3041903151793263e-06, "loss": 0.3349, "step": 8065, "task_loss": 0.2524043321609497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.42896702885627747, "epoch": 6.82, "learning_rate": 1.298152397053496e-06, "loss": 0.3881, "step": 8066, "task_loss": 0.3314337432384491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25126421451568604, "epoch": 6.82, "learning_rate": 1.2921144789276657e-06, "loss": 0.3232, "step": 8067, "task_loss": 1.0493206977844238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24218739569187164, "epoch": 6.82, "learning_rate": 1.2860765608018356e-06, "loss": 0.3455, "step": 8068, "task_loss": 1.0824576616287231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20783577859401703, "epoch": 6.82, "learning_rate": 1.2800386426760054e-06, "loss": 0.3512, "step": 8069, "task_loss": 0.7060831785202026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4064688980579376, "epoch": 6.82, "learning_rate": 1.274000724550175e-06, "loss": 0.3707, "step": 8070, "task_loss": 0.5438362956047058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36701062321662903, "epoch": 6.82, "learning_rate": 1.267962806424345e-06, "loss": 0.3064, "step": 8071, "task_loss": 0.61564040184021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3394377827644348, "epoch": 6.82, "learning_rate": 1.2619248882985147e-06, "loss": 0.3207, "step": 8072, "task_loss": 0.38762035965919495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34342658519744873, "epoch": 6.82, "learning_rate": 1.2558869701726847e-06, "loss": 0.2373, "step": 8073, "task_loss": 0.9389482736587524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4504706859588623, "epoch": 6.82, "learning_rate": 1.2498490520468544e-06, "loss": 0.3782, "step": 8074, "task_loss": 1.2320599555969238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.0737449899315834, "epoch": 6.83, "learning_rate": 1.2438111339210241e-06, "loss": 0.3235, "step": 8075, "task_loss": 0.05896284803748131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26180940866470337, "epoch": 6.83, "learning_rate": 1.2377732157951939e-06, "loss": 0.2572, "step": 8076, "task_loss": 0.22941842675209045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25186991691589355, "epoch": 6.83, "learning_rate": 1.2317352976693636e-06, "loss": 0.316, "step": 8077, "task_loss": 0.3299866318702698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13357414305210114, "epoch": 6.83, "learning_rate": 1.2256973795435333e-06, "loss": 0.2642, "step": 8078, "task_loss": 0.08596650511026382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27413231134414673, "epoch": 6.83, "learning_rate": 1.2196594614177032e-06, "loss": 0.2522, "step": 8079, "task_loss": 0.7011333703994751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.298433780670166, "epoch": 6.83, "learning_rate": 1.213621543291873e-06, "loss": 0.3027, "step": 8080, "task_loss": 0.8602815270423889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19633105397224426, "epoch": 6.83, "learning_rate": 1.207583625166043e-06, "loss": 0.3098, "step": 8081, "task_loss": 0.5440811514854431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.12785178422927856, "epoch": 6.83, "learning_rate": 1.2015457070402126e-06, "loss": 0.3051, "step": 8082, "task_loss": 0.03942573443055153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30170437693595886, "epoch": 6.83, "learning_rate": 1.1955077889143823e-06, "loss": 0.3063, "step": 8083, "task_loss": 1.2773337364196777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.15988485515117645, "epoch": 6.83, "learning_rate": 1.1894698707885523e-06, "loss": 0.3359, "step": 8084, "task_loss": 1.1032382249832153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40748053789138794, "epoch": 6.83, "learning_rate": 1.183431952662722e-06, "loss": 0.4591, "step": 8085, "task_loss": 0.5076593160629272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.37493035197257996, "epoch": 6.83, "learning_rate": 1.1773940345368917e-06, "loss": 0.3179, "step": 8086, "task_loss": 0.138167142868042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28917574882507324, "epoch": 6.84, "learning_rate": 1.1713561164110615e-06, "loss": 0.3233, "step": 8087, "task_loss": 0.3476995825767517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2779361605644226, "epoch": 6.84, "learning_rate": 1.1653181982852312e-06, "loss": 0.2479, "step": 8088, "task_loss": 0.4174083173274994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4206591546535492, "epoch": 6.84, "learning_rate": 1.1592802801594011e-06, "loss": 0.3539, "step": 8089, "task_loss": 0.5294107794761658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18704599142074585, "epoch": 6.84, "learning_rate": 1.1532423620335708e-06, "loss": 0.2835, "step": 8090, "task_loss": 0.3899939954280853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2788778841495514, "epoch": 6.84, "learning_rate": 1.1472044439077406e-06, "loss": 0.2943, "step": 8091, "task_loss": 0.6307648420333862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21993741393089294, "epoch": 6.84, "learning_rate": 1.1411665257819105e-06, "loss": 0.292, "step": 8092, "task_loss": 0.8136159181594849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36517947912216187, "epoch": 6.84, "learning_rate": 1.1351286076560802e-06, "loss": 0.3263, "step": 8093, "task_loss": 1.1364091634750366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.08684787899255753, "epoch": 6.84, "learning_rate": 1.1290906895302502e-06, "loss": 0.3038, "step": 8094, "task_loss": 0.07113494724035263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31285181641578674, "epoch": 6.84, "learning_rate": 1.1230527714044199e-06, "loss": 0.2965, "step": 8095, "task_loss": 0.43261706829071045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.38133564591407776, "epoch": 6.84, "learning_rate": 1.1170148532785896e-06, "loss": 0.2733, "step": 8096, "task_loss": 0.4808973968029022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25850045680999756, "epoch": 6.84, "learning_rate": 1.1109769351527593e-06, "loss": 0.3457, "step": 8097, "task_loss": 0.5472986102104187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20295509696006775, "epoch": 6.84, "learning_rate": 1.104939017026929e-06, "loss": 0.2705, "step": 8098, "task_loss": 0.559759259223938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16229066252708435, "epoch": 6.85, "learning_rate": 1.098901098901099e-06, "loss": 0.2744, "step": 8099, "task_loss": 0.1694437563419342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3454825282096863, "epoch": 6.85, "learning_rate": 1.0928631807752687e-06, "loss": 0.2898, "step": 8100, "task_loss": 0.32291844487190247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17264291644096375, "epoch": 6.85, "learning_rate": 1.0868252626494384e-06, "loss": 0.3033, "step": 8101, "task_loss": 0.6828551292419434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18552936613559723, "epoch": 6.85, "learning_rate": 1.0807873445236084e-06, "loss": 0.2657, "step": 8102, "task_loss": 0.03817024827003479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3672095537185669, "epoch": 6.85, "learning_rate": 1.074749426397778e-06, "loss": 0.3969, "step": 8103, "task_loss": 0.3835495114326477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17212480306625366, "epoch": 6.85, "learning_rate": 1.0687115082719478e-06, "loss": 0.3112, "step": 8104, "task_loss": 0.29686403274536133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.261046826839447, "epoch": 6.85, "learning_rate": 1.0626735901461178e-06, "loss": 0.2761, "step": 8105, "task_loss": 0.6027694940567017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2947114109992981, "epoch": 6.85, "learning_rate": 1.0566356720202875e-06, "loss": 0.2806, "step": 8106, "task_loss": 0.7910678386688232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24235139787197113, "epoch": 6.85, "learning_rate": 1.0505977538944574e-06, "loss": 0.2588, "step": 8107, "task_loss": 2.7524631023406982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2062629759311676, "epoch": 6.85, "learning_rate": 1.044559835768627e-06, "loss": 0.3039, "step": 8108, "task_loss": 0.5322403311729431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32488512992858887, "epoch": 6.85, "learning_rate": 1.0385219176427967e-06, "loss": 0.3512, "step": 8109, "task_loss": 0.45251765847206116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22576753795146942, "epoch": 6.85, "learning_rate": 1.0324839995169666e-06, "loss": 0.2715, "step": 8110, "task_loss": 0.8214532136917114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4171208143234253, "epoch": 6.86, "learning_rate": 1.0264460813911363e-06, "loss": 0.3184, "step": 8111, "task_loss": 0.7386752963066101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16684280335903168, "epoch": 6.86, "learning_rate": 1.020408163265306e-06, "loss": 0.1925, "step": 8112, "task_loss": 0.31502044200897217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1890266388654709, "epoch": 6.86, "learning_rate": 1.014370245139476e-06, "loss": 0.339, "step": 8113, "task_loss": 0.2939665615558624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2129400223493576, "epoch": 6.86, "learning_rate": 1.0083323270136457e-06, "loss": 0.2876, "step": 8114, "task_loss": 0.7610366940498352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3258943557739258, "epoch": 6.86, "learning_rate": 1.0022944088878156e-06, "loss": 0.3225, "step": 8115, "task_loss": 0.9040721654891968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2057328224182129, "epoch": 6.86, "learning_rate": 9.962564907619854e-07, "loss": 0.3035, "step": 8116, "task_loss": 0.09755367785692215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39392226934432983, "epoch": 6.86, "learning_rate": 9.90218572636155e-07, "loss": 0.3363, "step": 8117, "task_loss": 0.6582924723625183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32428544759750366, "epoch": 6.86, "learning_rate": 9.84180654510325e-07, "loss": 0.2212, "step": 8118, "task_loss": 0.8731036186218262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39777514338493347, "epoch": 6.86, "learning_rate": 9.781427363844947e-07, "loss": 0.3898, "step": 8119, "task_loss": 0.9144240021705627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2189563810825348, "epoch": 6.86, "learning_rate": 9.721048182586645e-07, "loss": 0.3575, "step": 8120, "task_loss": 0.6303703784942627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4214423894882202, "epoch": 6.86, "learning_rate": 9.660669001328342e-07, "loss": 0.3723, "step": 8121, "task_loss": 0.43207645416259766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3185799717903137, "epoch": 6.87, "learning_rate": 9.60028982007004e-07, "loss": 0.338, "step": 8122, "task_loss": 0.5028026700019836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24737197160720825, "epoch": 6.87, "learning_rate": 9.539910638811739e-07, "loss": 0.27, "step": 8123, "task_loss": 0.48548266291618347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.7360237240791321, "epoch": 6.87, "learning_rate": 9.479531457553436e-07, "loss": 0.4271, "step": 8124, "task_loss": 1.0946990251541138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36096176505088806, "epoch": 6.87, "learning_rate": 9.419152276295133e-07, "loss": 0.3247, "step": 8125, "task_loss": 0.9930521249771118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25736305117607117, "epoch": 6.87, "learning_rate": 9.358773095036832e-07, "loss": 0.3138, "step": 8126, "task_loss": 0.7577213644981384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26965492963790894, "epoch": 6.87, "learning_rate": 9.29839391377853e-07, "loss": 0.3231, "step": 8127, "task_loss": 0.6323341727256775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2879280745983124, "epoch": 6.87, "learning_rate": 9.238014732520228e-07, "loss": 0.3288, "step": 8128, "task_loss": 0.11145814508199692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29629087448120117, "epoch": 6.87, "learning_rate": 9.177635551261925e-07, "loss": 0.313, "step": 8129, "task_loss": 0.5196112394332886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3075372874736786, "epoch": 6.87, "learning_rate": 9.117256370003622e-07, "loss": 0.3298, "step": 8130, "task_loss": 0.03525550290942192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1859060823917389, "epoch": 6.87, "learning_rate": 9.056877188745322e-07, "loss": 0.2424, "step": 8131, "task_loss": 0.29234567284584045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2179563343524933, "epoch": 6.87, "learning_rate": 8.996498007487019e-07, "loss": 0.3605, "step": 8132, "task_loss": 0.7765485048294067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39116746187210083, "epoch": 6.87, "learning_rate": 8.936118826228716e-07, "loss": 0.2705, "step": 8133, "task_loss": 0.23488247394561768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2213660627603531, "epoch": 6.88, "learning_rate": 8.875739644970415e-07, "loss": 0.307, "step": 8134, "task_loss": 0.9938311576843262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29961222410202026, "epoch": 6.88, "learning_rate": 8.815360463712112e-07, "loss": 0.3603, "step": 8135, "task_loss": 0.6065184473991394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22712308168411255, "epoch": 6.88, "learning_rate": 8.754981282453811e-07, "loss": 0.2791, "step": 8136, "task_loss": 0.41638967394828796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4927104115486145, "epoch": 6.88, "learning_rate": 8.694602101195508e-07, "loss": 0.3887, "step": 8137, "task_loss": 0.40680167078971863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25799447298049927, "epoch": 6.88, "learning_rate": 8.634222919937206e-07, "loss": 0.2719, "step": 8138, "task_loss": 0.5423984527587891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24627335369586945, "epoch": 6.88, "learning_rate": 8.573843738678904e-07, "loss": 0.4332, "step": 8139, "task_loss": 0.17384549975395203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2732212543487549, "epoch": 6.88, "learning_rate": 8.513464557420601e-07, "loss": 0.2482, "step": 8140, "task_loss": 0.21629886329174042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25000759959220886, "epoch": 6.88, "learning_rate": 8.4530853761623e-07, "loss": 0.2493, "step": 8141, "task_loss": 0.08205603063106537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28253373503685, "epoch": 6.88, "learning_rate": 8.392706194903998e-07, "loss": 0.3391, "step": 8142, "task_loss": 0.39278295636177063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2695361375808716, "epoch": 6.88, "learning_rate": 8.332327013645695e-07, "loss": 0.3528, "step": 8143, "task_loss": 0.5311014652252197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21153134107589722, "epoch": 6.88, "learning_rate": 8.271947832387393e-07, "loss": 0.2707, "step": 8144, "task_loss": 0.4259752035140991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44871073961257935, "epoch": 6.88, "learning_rate": 8.211568651129091e-07, "loss": 0.3129, "step": 8145, "task_loss": 0.6044777035713196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23143355548381805, "epoch": 6.89, "learning_rate": 8.151189469870788e-07, "loss": 0.2787, "step": 8146, "task_loss": 0.6836413145065308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.336157888174057, "epoch": 6.89, "learning_rate": 8.090810288612487e-07, "loss": 0.3044, "step": 8147, "task_loss": 0.1517215520143509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14739662408828735, "epoch": 6.89, "learning_rate": 8.030431107354184e-07, "loss": 0.2912, "step": 8148, "task_loss": 0.26469409465789795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3187646269798279, "epoch": 6.89, "learning_rate": 7.970051926095884e-07, "loss": 0.3068, "step": 8149, "task_loss": 0.3120018541812897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.40963953733444214, "epoch": 6.89, "learning_rate": 7.90967274483758e-07, "loss": 0.3233, "step": 8150, "task_loss": 0.4985775947570801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.45969897508621216, "epoch": 6.89, "learning_rate": 7.849293563579277e-07, "loss": 0.3609, "step": 8151, "task_loss": 1.4935355186462402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2014041543006897, "epoch": 6.89, "learning_rate": 7.788914382320975e-07, "loss": 0.3252, "step": 8152, "task_loss": 0.4154960811138153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25888919830322266, "epoch": 6.89, "learning_rate": 7.728535201062674e-07, "loss": 0.3264, "step": 8153, "task_loss": 0.2965540587902069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2041589915752411, "epoch": 6.89, "learning_rate": 7.668156019804372e-07, "loss": 0.2488, "step": 8154, "task_loss": 0.662189245223999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23405727744102478, "epoch": 6.89, "learning_rate": 7.60777683854607e-07, "loss": 0.2448, "step": 8155, "task_loss": 0.28181442618370056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4149600565433502, "epoch": 6.89, "learning_rate": 7.547397657287768e-07, "loss": 0.3793, "step": 8156, "task_loss": 0.8849920034408569 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2903691232204437, "epoch": 6.89, "learning_rate": 7.487018476029465e-07, "loss": 0.3504, "step": 8157, "task_loss": 1.2153733968734741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19872692227363586, "epoch": 6.9, "learning_rate": 7.426639294771163e-07, "loss": 0.2661, "step": 8158, "task_loss": 0.7905094623565674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.172980397939682, "epoch": 6.9, "learning_rate": 7.366260113512861e-07, "loss": 0.3319, "step": 8159, "task_loss": 0.39536532759666443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.16558168828487396, "epoch": 6.9, "learning_rate": 7.30588093225456e-07, "loss": 0.3049, "step": 8160, "task_loss": 0.22968032956123352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19797174632549286, "epoch": 6.9, "learning_rate": 7.245501750996257e-07, "loss": 0.2757, "step": 8161, "task_loss": 0.12291800230741501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23772065341472626, "epoch": 6.9, "learning_rate": 7.185122569737954e-07, "loss": 0.2503, "step": 8162, "task_loss": 0.5060144662857056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1547754406929016, "epoch": 6.9, "learning_rate": 7.124743388479653e-07, "loss": 0.2775, "step": 8163, "task_loss": 1.0469094514846802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2735685408115387, "epoch": 6.9, "learning_rate": 7.064364207221351e-07, "loss": 0.3206, "step": 8164, "task_loss": 0.78255295753479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2555900514125824, "epoch": 6.9, "learning_rate": 7.003985025963048e-07, "loss": 0.2412, "step": 8165, "task_loss": 0.05103924497961998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2751426696777344, "epoch": 6.9, "learning_rate": 6.943605844704746e-07, "loss": 0.4139, "step": 8166, "task_loss": 0.9180894494056702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25021523237228394, "epoch": 6.9, "learning_rate": 6.883226663446444e-07, "loss": 0.2986, "step": 8167, "task_loss": 0.4587714672088623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28558751940727234, "epoch": 6.9, "learning_rate": 6.822847482188142e-07, "loss": 0.3457, "step": 8168, "task_loss": 0.8578944802284241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3579738736152649, "epoch": 6.9, "learning_rate": 6.762468300929839e-07, "loss": 0.3444, "step": 8169, "task_loss": 0.6977820992469788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23805683851242065, "epoch": 6.91, "learning_rate": 6.702089119671537e-07, "loss": 0.2669, "step": 8170, "task_loss": 0.20424097776412964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39966773986816406, "epoch": 6.91, "learning_rate": 6.641709938413236e-07, "loss": 0.3908, "step": 8171, "task_loss": 0.4964715242385864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23347340524196625, "epoch": 6.91, "learning_rate": 6.581330757154933e-07, "loss": 0.3215, "step": 8172, "task_loss": 0.284160315990448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2014266550540924, "epoch": 6.91, "learning_rate": 6.520951575896631e-07, "loss": 0.2853, "step": 8173, "task_loss": 0.9495064616203308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39157894253730774, "epoch": 6.91, "learning_rate": 6.460572394638329e-07, "loss": 0.3718, "step": 8174, "task_loss": 0.9603783488273621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.542847752571106, "epoch": 6.91, "learning_rate": 6.400193213380027e-07, "loss": 0.3459, "step": 8175, "task_loss": 1.368090033531189 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34035083651542664, "epoch": 6.91, "learning_rate": 6.339814032121725e-07, "loss": 0.3063, "step": 8176, "task_loss": 0.17146940529346466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4257517457008362, "epoch": 6.91, "learning_rate": 6.279434850863423e-07, "loss": 0.3728, "step": 8177, "task_loss": 1.2788125276565552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1344277262687683, "epoch": 6.91, "learning_rate": 6.219055669605121e-07, "loss": 0.3205, "step": 8178, "task_loss": 0.41653403639793396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1098686009645462, "epoch": 6.91, "learning_rate": 6.158676488346818e-07, "loss": 0.2896, "step": 8179, "task_loss": 0.13602645695209503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2409965842962265, "epoch": 6.91, "learning_rate": 6.098297307088516e-07, "loss": 0.2866, "step": 8180, "task_loss": 0.59437495470047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30237001180648804, "epoch": 6.91, "learning_rate": 6.037918125830215e-07, "loss": 0.3329, "step": 8181, "task_loss": 0.6363590955734253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39941102266311646, "epoch": 6.92, "learning_rate": 5.977538944571912e-07, "loss": 0.3526, "step": 8182, "task_loss": 0.9579609036445618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5557575225830078, "epoch": 6.92, "learning_rate": 5.91715976331361e-07, "loss": 0.3706, "step": 8183, "task_loss": 0.9898801445960999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32378292083740234, "epoch": 6.92, "learning_rate": 5.856780582055307e-07, "loss": 0.3228, "step": 8184, "task_loss": 1.1276566982269287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34337693452835083, "epoch": 6.92, "learning_rate": 5.796401400797006e-07, "loss": 0.265, "step": 8185, "task_loss": 0.13268662989139557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.44526389241218567, "epoch": 6.92, "learning_rate": 5.736022219538703e-07, "loss": 0.3624, "step": 8186, "task_loss": 1.0438889265060425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23052160441875458, "epoch": 6.92, "learning_rate": 5.675643038280401e-07, "loss": 0.3471, "step": 8187, "task_loss": 0.6481570601463318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4230203330516815, "epoch": 6.92, "learning_rate": 5.615263857022099e-07, "loss": 0.2717, "step": 8188, "task_loss": 0.675198495388031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2633015811443329, "epoch": 6.92, "learning_rate": 5.554884675763797e-07, "loss": 0.341, "step": 8189, "task_loss": 0.48907262086868286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22429633140563965, "epoch": 6.92, "learning_rate": 5.494505494505495e-07, "loss": 0.2265, "step": 8190, "task_loss": 0.5093376040458679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.22734838724136353, "epoch": 6.92, "learning_rate": 5.434126313247192e-07, "loss": 0.2033, "step": 8191, "task_loss": 0.32123351097106934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.41917455196380615, "epoch": 6.92, "learning_rate": 5.37374713198889e-07, "loss": 0.4234, "step": 8192, "task_loss": 0.7913708686828613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.134751096367836, "epoch": 6.93, "learning_rate": 5.313367950730589e-07, "loss": 0.2947, "step": 8193, "task_loss": 0.475504070520401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.36074793338775635, "epoch": 6.93, "learning_rate": 5.252988769472287e-07, "loss": 0.3021, "step": 8194, "task_loss": 0.3016511797904968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5768441557884216, "epoch": 6.93, "learning_rate": 5.192609588213983e-07, "loss": 0.3446, "step": 8195, "task_loss": 0.3304390609264374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19765472412109375, "epoch": 6.93, "learning_rate": 5.132230406955682e-07, "loss": 0.2683, "step": 8196, "task_loss": 0.5588628053665161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20502425730228424, "epoch": 6.93, "learning_rate": 5.07185122569738e-07, "loss": 0.2534, "step": 8197, "task_loss": 0.6455263495445251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3250991106033325, "epoch": 6.93, "learning_rate": 5.011472044439078e-07, "loss": 0.3222, "step": 8198, "task_loss": 0.4275934100151062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2995171546936035, "epoch": 6.93, "learning_rate": 4.951092863180775e-07, "loss": 0.2826, "step": 8199, "task_loss": 1.3627548217773438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34595057368278503, "epoch": 6.93, "learning_rate": 4.890713681922474e-07, "loss": 0.318, "step": 8200, "task_loss": 0.20615370571613312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24902139604091644, "epoch": 6.93, "learning_rate": 4.830334500664171e-07, "loss": 0.366, "step": 8201, "task_loss": 0.7243157029151917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2457047402858734, "epoch": 6.93, "learning_rate": 4.769955319405869e-07, "loss": 0.2483, "step": 8202, "task_loss": 0.8789578676223755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24658262729644775, "epoch": 6.93, "learning_rate": 4.7095761381475665e-07, "loss": 0.2259, "step": 8203, "task_loss": 0.4583815336227417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2697376012802124, "epoch": 6.93, "learning_rate": 4.649196956889265e-07, "loss": 0.2508, "step": 8204, "task_loss": 0.0597974956035614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2163066416978836, "epoch": 6.94, "learning_rate": 4.5888177756309626e-07, "loss": 0.2752, "step": 8205, "task_loss": 0.6118817925453186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.13691255450248718, "epoch": 6.94, "learning_rate": 4.528438594372661e-07, "loss": 0.2128, "step": 8206, "task_loss": 0.9369524121284485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3042284846305847, "epoch": 6.94, "learning_rate": 4.468059413114358e-07, "loss": 0.287, "step": 8207, "task_loss": 0.4605056643486023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14134693145751953, "epoch": 6.94, "learning_rate": 4.407680231856056e-07, "loss": 0.2645, "step": 8208, "task_loss": 0.01796095073223114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3148330748081207, "epoch": 6.94, "learning_rate": 4.347301050597754e-07, "loss": 0.3119, "step": 8209, "task_loss": 0.7615633010864258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4531823694705963, "epoch": 6.94, "learning_rate": 4.286921869339452e-07, "loss": 0.3313, "step": 8210, "task_loss": 0.37519437074661255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5396493077278137, "epoch": 6.94, "learning_rate": 4.22654268808115e-07, "loss": 0.3325, "step": 8211, "task_loss": 0.9569540023803711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20939406752586365, "epoch": 6.94, "learning_rate": 4.1661635068228475e-07, "loss": 0.3202, "step": 8212, "task_loss": 0.46305450797080994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2282097339630127, "epoch": 6.94, "learning_rate": 4.1057843255645453e-07, "loss": 0.3432, "step": 8213, "task_loss": 0.19440406560897827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.18487074971199036, "epoch": 6.94, "learning_rate": 4.0454051443062436e-07, "loss": 0.223, "step": 8214, "task_loss": 0.20114684104919434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.39072197675704956, "epoch": 6.94, "learning_rate": 3.985025963047942e-07, "loss": 0.3168, "step": 8215, "task_loss": 1.0516399145126343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2245337814092636, "epoch": 6.94, "learning_rate": 3.9246467817896386e-07, "loss": 0.1974, "step": 8216, "task_loss": 0.21339841187000275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2399439513683319, "epoch": 6.95, "learning_rate": 3.864267600531337e-07, "loss": 0.3427, "step": 8217, "task_loss": 1.562753677368164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2244424819946289, "epoch": 6.95, "learning_rate": 3.803888419273035e-07, "loss": 0.3184, "step": 8218, "task_loss": 1.1775087118148804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14690548181533813, "epoch": 6.95, "learning_rate": 3.7435092380147324e-07, "loss": 0.2773, "step": 8219, "task_loss": 0.2650884985923767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34558311104774475, "epoch": 6.95, "learning_rate": 3.6831300567564307e-07, "loss": 0.3815, "step": 8220, "task_loss": 0.752966046333313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2394636571407318, "epoch": 6.95, "learning_rate": 3.6227508754981285e-07, "loss": 0.2903, "step": 8221, "task_loss": 1.2829264402389526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27834123373031616, "epoch": 6.95, "learning_rate": 3.562371694239826e-07, "loss": 0.3443, "step": 8222, "task_loss": 1.185127854347229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27864736318588257, "epoch": 6.95, "learning_rate": 3.501992512981524e-07, "loss": 0.2702, "step": 8223, "task_loss": 0.4678493142127991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14460159838199615, "epoch": 6.95, "learning_rate": 3.441613331723222e-07, "loss": 0.2653, "step": 8224, "task_loss": 0.0807252898812294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2193092554807663, "epoch": 6.95, "learning_rate": 3.3812341504649196e-07, "loss": 0.3063, "step": 8225, "task_loss": 0.03410137817263603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2172752022743225, "epoch": 6.95, "learning_rate": 3.320854969206618e-07, "loss": 0.327, "step": 8226, "task_loss": 0.32131847739219666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3825490474700928, "epoch": 6.95, "learning_rate": 3.2604757879483156e-07, "loss": 0.3284, "step": 8227, "task_loss": 0.6248998045921326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.14463874697685242, "epoch": 6.95, "learning_rate": 3.2000966066900134e-07, "loss": 0.1652, "step": 8228, "task_loss": 0.22989198565483093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.23500677943229675, "epoch": 6.96, "learning_rate": 3.1397174254317117e-07, "loss": 0.3311, "step": 8229, "task_loss": 0.1440393626689911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20343837141990662, "epoch": 6.96, "learning_rate": 3.079338244173409e-07, "loss": 0.3376, "step": 8230, "task_loss": 0.5078994631767273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5268635749816895, "epoch": 6.96, "learning_rate": 3.018959062915107e-07, "loss": 0.3108, "step": 8231, "task_loss": 1.1546776294708252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24569375813007355, "epoch": 6.96, "learning_rate": 2.958579881656805e-07, "loss": 0.2749, "step": 8232, "task_loss": 0.5749136209487915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.207976832985878, "epoch": 6.96, "learning_rate": 2.898200700398503e-07, "loss": 0.2211, "step": 8233, "task_loss": 0.6242374181747437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.5050680637359619, "epoch": 6.96, "learning_rate": 2.8378215191402006e-07, "loss": 0.3447, "step": 8234, "task_loss": 1.1040611267089844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.446163147687912, "epoch": 6.96, "learning_rate": 2.7774423378818983e-07, "loss": 0.3423, "step": 8235, "task_loss": 0.624466061592102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.17755959928035736, "epoch": 6.96, "learning_rate": 2.717063156623596e-07, "loss": 0.2896, "step": 8236, "task_loss": 0.6469965577125549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3547624945640564, "epoch": 6.96, "learning_rate": 2.6566839753652944e-07, "loss": 0.278, "step": 8237, "task_loss": 0.7493686676025391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4411579966545105, "epoch": 6.96, "learning_rate": 2.5963047941069916e-07, "loss": 0.2988, "step": 8238, "task_loss": 0.424460768699646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20929446816444397, "epoch": 6.96, "learning_rate": 2.53592561284869e-07, "loss": 0.2772, "step": 8239, "task_loss": 0.33165857195854187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3273460268974304, "epoch": 6.96, "learning_rate": 2.4755464315903877e-07, "loss": 0.3785, "step": 8240, "task_loss": 0.6233994960784912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4565318822860718, "epoch": 6.97, "learning_rate": 2.4151672503320855e-07, "loss": 0.3739, "step": 8241, "task_loss": 0.30213606357574463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1870637834072113, "epoch": 6.97, "learning_rate": 2.3547880690737833e-07, "loss": 0.1816, "step": 8242, "task_loss": 0.1939718872308731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.21335430443286896, "epoch": 6.97, "learning_rate": 2.2944088878154813e-07, "loss": 0.2959, "step": 8243, "task_loss": 0.3067457377910614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2173275649547577, "epoch": 6.97, "learning_rate": 2.234029706557179e-07, "loss": 0.2543, "step": 8244, "task_loss": 0.6393483281135559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.32693368196487427, "epoch": 6.97, "learning_rate": 2.173650525298877e-07, "loss": 0.2959, "step": 8245, "task_loss": 0.6045393943786621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.20965278148651123, "epoch": 6.97, "learning_rate": 2.113271344040575e-07, "loss": 0.3175, "step": 8246, "task_loss": 0.35388824343681335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.24290627241134644, "epoch": 6.97, "learning_rate": 2.0528921627822726e-07, "loss": 0.254, "step": 8247, "task_loss": 0.6450813412666321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2106490284204483, "epoch": 6.97, "learning_rate": 1.992512981523971e-07, "loss": 0.3244, "step": 8248, "task_loss": 0.5154479146003723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.31379470229148865, "epoch": 6.97, "learning_rate": 1.9321338002656684e-07, "loss": 0.2729, "step": 8249, "task_loss": 0.6052679419517517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.46341806650161743, "epoch": 6.97, "learning_rate": 1.8717546190073662e-07, "loss": 0.4013, "step": 8250, "task_loss": 0.9756036400794983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1569734513759613, "epoch": 6.97, "learning_rate": 1.8113754377490642e-07, "loss": 0.2719, "step": 8251, "task_loss": 0.2124776840209961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2888219356536865, "epoch": 6.97, "learning_rate": 1.750996256490762e-07, "loss": 0.4075, "step": 8252, "task_loss": 0.9251034259796143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.30754774808883667, "epoch": 6.98, "learning_rate": 1.6906170752324598e-07, "loss": 0.3086, "step": 8253, "task_loss": 0.7179510593414307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19903141260147095, "epoch": 6.98, "learning_rate": 1.6302378939741578e-07, "loss": 0.3124, "step": 8254, "task_loss": 0.5098888874053955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19829589128494263, "epoch": 6.98, "learning_rate": 1.5698587127158559e-07, "loss": 0.2823, "step": 8255, "task_loss": 0.48862066864967346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2619982957839966, "epoch": 6.98, "learning_rate": 1.5094795314575536e-07, "loss": 0.2845, "step": 8256, "task_loss": 0.4389473497867584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2634032070636749, "epoch": 6.98, "learning_rate": 1.4491003501992514e-07, "loss": 0.2509, "step": 8257, "task_loss": 0.10000568628311157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.28403204679489136, "epoch": 6.98, "learning_rate": 1.3887211689409492e-07, "loss": 0.289, "step": 8258, "task_loss": 0.8160184025764465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4550357758998871, "epoch": 6.98, "learning_rate": 1.3283419876826472e-07, "loss": 0.4144, "step": 8259, "task_loss": 2.077317237854004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3712099492549896, "epoch": 6.98, "learning_rate": 1.267962806424345e-07, "loss": 0.3141, "step": 8260, "task_loss": 0.5789496302604675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3545021414756775, "epoch": 6.98, "learning_rate": 1.2075836251660427e-07, "loss": 0.3563, "step": 8261, "task_loss": 0.500425398349762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4160536229610443, "epoch": 6.98, "learning_rate": 1.1472044439077406e-07, "loss": 0.368, "step": 8262, "task_loss": 0.586093544960022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3602394759654999, "epoch": 6.98, "learning_rate": 1.0868252626494385e-07, "loss": 0.286, "step": 8263, "task_loss": 0.18316397070884705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4232527017593384, "epoch": 6.99, "learning_rate": 1.0264460813911363e-07, "loss": 0.3704, "step": 8264, "task_loss": 0.6587258577346802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.27059102058410645, "epoch": 6.99, "learning_rate": 9.660669001328342e-08, "loss": 0.3826, "step": 8265, "task_loss": 0.9369746446609497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2870047986507416, "epoch": 6.99, "learning_rate": 9.056877188745321e-08, "loss": 0.2667, "step": 8266, "task_loss": 0.3356179893016815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2788523733615875, "epoch": 6.99, "learning_rate": 8.453085376162299e-08, "loss": 0.297, "step": 8267, "task_loss": 1.1274642944335938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.29962587356567383, "epoch": 6.99, "learning_rate": 7.849293563579279e-08, "loss": 0.3552, "step": 8268, "task_loss": 1.5454800128936768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.10299444198608398, "epoch": 6.99, "learning_rate": 7.245501750996257e-08, "loss": 0.2483, "step": 8269, "task_loss": 0.015107410028576851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.3077191114425659, "epoch": 6.99, "learning_rate": 6.641709938413236e-08, "loss": 0.3261, "step": 8270, "task_loss": 0.05418427661061287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.43821582198143005, "epoch": 6.99, "learning_rate": 6.037918125830214e-08, "loss": 0.3147, "step": 8271, "task_loss": 1.116099238395691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.4122049808502197, "epoch": 6.99, "learning_rate": 5.434126313247193e-08, "loss": 0.3768, "step": 8272, "task_loss": 0.537239134311676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1816696673631668, "epoch": 6.99, "learning_rate": 4.830334500664171e-08, "loss": 0.2776, "step": 8273, "task_loss": 0.3666461408138275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.25904738903045654, "epoch": 6.99, "learning_rate": 4.2265426880811495e-08, "loss": 0.2624, "step": 8274, "task_loss": 0.2622477114200592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.253604531288147, "epoch": 6.99, "learning_rate": 3.6227508754981285e-08, "loss": 0.2714, "step": 8275, "task_loss": 0.7127410173416138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34959331154823303, "epoch": 7.0, "learning_rate": 3.018959062915107e-08, "loss": 0.3268, "step": 8276, "task_loss": 1.0597003698349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.2063511312007904, "epoch": 7.0, "learning_rate": 2.4151672503320856e-08, "loss": 0.3423, "step": 8277, "task_loss": 0.9842402338981628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.26667875051498413, "epoch": 7.0, "learning_rate": 1.8113754377490642e-08, "loss": 0.3135, "step": 8278, "task_loss": 0.8645683526992798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.1573685109615326, "epoch": 7.0, "learning_rate": 1.2075836251660428e-08, "loss": 0.2557, "step": 8279, "task_loss": 0.0957130491733551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.34253910183906555, "epoch": 7.0, "learning_rate": 6.037918125830214e-09, "loss": 0.2936, "step": 8280, "task_loss": 0.1565895974636078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.3055917334801628, "compression/movement_sparsity/model_sparsity": 0.2950937173249859, "compression_loss": 0.0, "distillation_loss": 0.19714051485061646, "epoch": 7.0, "learning_rate": 0.0, "loss": 0.32, "step": 8281, "task_loss": 0.2995016574859619 }, { "epoch": 7.0, "step": 8281, "total_flos": 4.176434448946852e+19, "train_loss": 12.387632197956075, "train_runtime": 41093.8385, "train_samples_per_second": 12.903, "train_steps_per_second": 0.202 } ], "max_steps": 8281, "num_train_epochs": 7, "total_flos": 4.176434448946852e+19, "trial_name": null, "trial_params": null }